1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)27 lookup_blorp_shader(struct blorp_batch *batch,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct blorp_context *blorp = batch->blorp;
32 struct anv_device *device = blorp->driver_ctx;
33
34 struct anv_shader_bin *bin =
35 anv_device_search_for_kernel(device, device->internal_cache,
36 key, key_size, NULL);
37 if (!bin)
38 return false;
39
40 /* The cache already has a reference and it's not going anywhere so there
41 * is no need to hold a second reference.
42 */
43 anv_shader_bin_unref(device, bin);
44
45 *kernel_out = bin->kernel.offset;
46 *(const struct elk_stage_prog_data **)prog_data_out = bin->prog_data;
47
48 return true;
49 }
50
51 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const void * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)52 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
53 const void *key, uint32_t key_size,
54 const void *kernel, uint32_t kernel_size,
55 const void *prog_data,
56 uint32_t prog_data_size,
57 uint32_t *kernel_out, void *prog_data_out)
58 {
59 struct blorp_context *blorp = batch->blorp;
60 struct anv_device *device = blorp->driver_ctx;
61
62 struct anv_pipeline_bind_map bind_map = {
63 .surface_count = 0,
64 .sampler_count = 0,
65 };
66
67 struct anv_shader_bin *bin =
68 anv_device_upload_kernel(device, device->internal_cache, stage,
69 key, key_size, kernel, kernel_size,
70 prog_data, prog_data_size,
71 NULL, 0, NULL, &bind_map);
72
73 if (!bin)
74 return false;
75
76 /* The cache already has a reference and it's not going anywhere so there
77 * is no need to hold a second reference.
78 */
79 anv_shader_bin_unref(device, bin);
80
81 *kernel_out = bin->kernel.offset;
82 *(const struct elk_stage_prog_data **)prog_data_out = bin->prog_data;
83
84 return true;
85 }
86
87 void
anv_device_init_blorp(struct anv_device * device)88 anv_device_init_blorp(struct anv_device *device)
89 {
90 const struct blorp_config config = {};
91
92 blorp_init_elk(&device->blorp, device, &device->isl_dev,
93 device->physical->compiler, &config);
94 device->blorp.lookup_shader = lookup_blorp_shader;
95 device->blorp.upload_shader = upload_blorp_shader;
96 switch (device->info->verx10) {
97 case 70:
98 device->blorp.exec = gfx7_blorp_exec;
99 break;
100 case 75:
101 device->blorp.exec = gfx75_blorp_exec;
102 break;
103 case 80:
104 device->blorp.exec = gfx8_blorp_exec;
105 break;
106 default:
107 unreachable("Unknown hardware generation");
108 }
109 }
110
111 void
anv_device_finish_blorp(struct anv_device * device)112 anv_device_finish_blorp(struct anv_device *device)
113 {
114 blorp_finish(&device->blorp);
115 }
116
117 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)118 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
119 struct blorp_batch *batch, enum blorp_batch_flags flags)
120 {
121 if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
122 assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
123 flags |= BLORP_BATCH_USE_COMPUTE;
124 }
125
126 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
127 }
128
129 static void
anv_blorp_batch_finish(struct blorp_batch * batch)130 anv_blorp_batch_finish(struct blorp_batch *batch)
131 {
132 blorp_batch_finish(batch);
133 }
134
135 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)136 get_blorp_surf_for_anv_buffer(struct anv_device *device,
137 struct anv_buffer *buffer, uint64_t offset,
138 uint32_t width, uint32_t height,
139 uint32_t row_pitch, enum isl_format format,
140 bool is_dest,
141 struct blorp_surf *blorp_surf,
142 struct isl_surf *isl_surf)
143 {
144 bool ok UNUSED;
145
146 *blorp_surf = (struct blorp_surf) {
147 .surf = isl_surf,
148 .addr = {
149 .buffer = buffer->address.bo,
150 .offset = buffer->address.offset + offset,
151 .mocs = anv_mocs(device, buffer->address.bo,
152 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
153 : ISL_SURF_USAGE_TEXTURE_BIT),
154 },
155 };
156
157 ok = isl_surf_init(&device->isl_dev, isl_surf,
158 .dim = ISL_SURF_DIM_2D,
159 .format = format,
160 .width = width,
161 .height = height,
162 .depth = 1,
163 .levels = 1,
164 .array_len = 1,
165 .samples = 1,
166 .row_pitch_B = row_pitch,
167 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
168 : ISL_SURF_USAGE_TEXTURE_BIT,
169 .tiling_flags = ISL_TILING_LINEAR_BIT);
170 assert(ok);
171 }
172
173 /* Pick something high enough that it won't be used in core and low enough it
174 * will never map to an extension.
175 */
176 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
177
178 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)179 anv_to_blorp_address(struct anv_address addr)
180 {
181 return (struct blorp_address) {
182 .buffer = addr.bo,
183 .offset = addr.offset,
184 };
185 }
186
187 static void
get_blorp_surf_for_anv_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)188 get_blorp_surf_for_anv_image(const struct anv_device *device,
189 const struct anv_image *image,
190 VkImageAspectFlags aspect,
191 VkImageUsageFlags usage,
192 VkImageLayout layout,
193 enum isl_aux_usage aux_usage,
194 struct blorp_surf *blorp_surf)
195 {
196 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
197
198 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
199 assert(usage != 0);
200 aux_usage = anv_layout_to_aux_usage(device->info, image,
201 aspect, usage, layout);
202 }
203
204 isl_surf_usage_flags_t mocs_usage =
205 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
206 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
207
208 const struct anv_surface *surface = &image->planes[plane].primary_surface;
209 const struct anv_address address =
210 anv_image_address(image, &surface->memory_range);
211
212 *blorp_surf = (struct blorp_surf) {
213 .surf = &surface->isl,
214 .addr = {
215 .buffer = address.bo,
216 .offset = address.offset,
217 .mocs = anv_mocs(device, address.bo, mocs_usage),
218 },
219 };
220
221 if (aux_usage != ISL_AUX_USAGE_NONE) {
222 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
223 const struct anv_address aux_address =
224 anv_image_address(image, &aux_surface->memory_range);
225
226 blorp_surf->aux_usage = aux_usage;
227 blorp_surf->aux_surf = &aux_surface->isl;
228
229 if (!anv_address_is_null(aux_address)) {
230 blorp_surf->aux_addr = (struct blorp_address) {
231 .buffer = aux_address.bo,
232 .offset = aux_address.offset,
233 .mocs = anv_mocs(device, aux_address.bo, 0),
234 };
235 }
236
237 /* If we're doing a partial resolve, then we need the indirect clear
238 * color. If we are doing a fast clear and want to store/update the
239 * clear color, we also pass the address to blorp, otherwise it will only
240 * stomp the CCS to a particular value and won't care about format or
241 * clear value
242 */
243 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
244 const struct anv_address clear_color_addr =
245 anv_image_get_clear_color_addr(device, image, aspect);
246 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
247 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
248 const struct anv_address clear_color_addr =
249 anv_image_get_clear_color_addr(device, image, aspect);
250 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
251 blorp_surf->clear_color = (union isl_color_value) {
252 .f32 = { ANV_HZ_FC_VAL },
253 };
254 }
255 }
256 }
257
258 static bool
get_blorp_surf_for_anv_shadow_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,struct blorp_surf * blorp_surf)259 get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
260 const struct anv_image *image,
261 VkImageAspectFlags aspect,
262 struct blorp_surf *blorp_surf)
263 {
264
265 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
266 if (!anv_surface_is_valid(&image->planes[plane].shadow_surface))
267 return false;
268
269 const struct anv_surface *surface = &image->planes[plane].shadow_surface;
270 const struct anv_address address =
271 anv_image_address(image, &surface->memory_range);
272
273 *blorp_surf = (struct blorp_surf) {
274 .surf = &surface->isl,
275 .addr = {
276 .buffer = address.bo,
277 .offset = address.offset,
278 .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT),
279 },
280 };
281
282 return true;
283 }
284
285 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2 * region)286 copy_image(struct anv_cmd_buffer *cmd_buffer,
287 struct blorp_batch *batch,
288 struct anv_image *src_image,
289 VkImageLayout src_image_layout,
290 struct anv_image *dst_image,
291 VkImageLayout dst_image_layout,
292 const VkImageCopy2 *region)
293 {
294 VkOffset3D srcOffset =
295 vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
296 VkOffset3D dstOffset =
297 vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
298 VkExtent3D extent =
299 vk_image_sanitize_extent(&src_image->vk, region->extent);
300
301 const uint32_t dst_level = region->dstSubresource.mipLevel;
302 unsigned dst_base_layer, layer_count;
303 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
304 dst_base_layer = region->dstOffset.z;
305 layer_count = region->extent.depth;
306 } else {
307 dst_base_layer = region->dstSubresource.baseArrayLayer;
308 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
309 ®ion->dstSubresource);
310 }
311
312 const uint32_t src_level = region->srcSubresource.mipLevel;
313 unsigned src_base_layer;
314 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
315 src_base_layer = region->srcOffset.z;
316 } else {
317 src_base_layer = region->srcSubresource.baseArrayLayer;
318 assert(layer_count ==
319 vk_image_subresource_layer_count(&src_image->vk,
320 ®ion->srcSubresource));
321 }
322
323 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
324 dst_mask = region->dstSubresource.aspectMask;
325
326 assert(anv_image_aspects_compatible(src_mask, dst_mask));
327
328 if (util_bitcount(src_mask) > 1) {
329 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
330 struct blorp_surf src_surf, dst_surf;
331 get_blorp_surf_for_anv_image(cmd_buffer->device,
332 src_image, 1UL << aspect_bit,
333 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
334 src_image_layout, ISL_AUX_USAGE_NONE,
335 &src_surf);
336 get_blorp_surf_for_anv_image(cmd_buffer->device,
337 dst_image, 1UL << aspect_bit,
338 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
339 dst_image_layout, ISL_AUX_USAGE_NONE,
340 &dst_surf);
341 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
342 1UL << aspect_bit,
343 dst_surf.aux_usage, dst_level,
344 dst_base_layer, layer_count);
345
346 for (unsigned i = 0; i < layer_count; i++) {
347 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
348 &dst_surf, dst_level, dst_base_layer + i,
349 srcOffset.x, srcOffset.y,
350 dstOffset.x, dstOffset.y,
351 extent.width, extent.height);
352 }
353
354 struct blorp_surf dst_shadow_surf;
355 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
356 dst_image,
357 1UL << aspect_bit,
358 &dst_shadow_surf)) {
359 for (unsigned i = 0; i < layer_count; i++) {
360 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
361 &dst_shadow_surf, dst_level, dst_base_layer + i,
362 srcOffset.x, srcOffset.y,
363 dstOffset.x, dstOffset.y,
364 extent.width, extent.height);
365 }
366 }
367 }
368 } else {
369 struct blorp_surf src_surf, dst_surf;
370 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
371 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
372 src_image_layout, ISL_AUX_USAGE_NONE,
373 &src_surf);
374 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
375 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
376 dst_image_layout, ISL_AUX_USAGE_NONE,
377 &dst_surf);
378 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
379 dst_surf.aux_usage, dst_level,
380 dst_base_layer, layer_count);
381
382 for (unsigned i = 0; i < layer_count; i++) {
383 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
384 &dst_surf, dst_level, dst_base_layer + i,
385 srcOffset.x, srcOffset.y,
386 dstOffset.x, dstOffset.y,
387 extent.width, extent.height);
388 }
389
390 struct blorp_surf dst_shadow_surf;
391 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
392 dst_image, dst_mask,
393 &dst_shadow_surf)) {
394 for (unsigned i = 0; i < layer_count; i++) {
395 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
396 &dst_shadow_surf, dst_level, dst_base_layer + i,
397 srcOffset.x, srcOffset.y,
398 dstOffset.x, dstOffset.y,
399 extent.width, extent.height);
400 }
401 }
402 }
403 }
404
anv_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)405 void anv_CmdCopyImage2(
406 VkCommandBuffer commandBuffer,
407 const VkCopyImageInfo2* pCopyImageInfo)
408 {
409 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
410 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
411 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
412
413 struct blorp_batch batch;
414 anv_blorp_batch_init(cmd_buffer, &batch, 0);
415
416 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
417 copy_image(cmd_buffer, &batch,
418 src_image, pCopyImageInfo->srcImageLayout,
419 dst_image, pCopyImageInfo->dstImageLayout,
420 &pCopyImageInfo->pRegions[r]);
421 }
422
423 anv_blorp_batch_finish(&batch);
424 }
425
426 static enum isl_format
isl_format_for_size(unsigned size_B)427 isl_format_for_size(unsigned size_B)
428 {
429 /* Prefer 32-bit per component formats for CmdFillBuffer */
430 switch (size_B) {
431 case 1: return ISL_FORMAT_R8_UINT;
432 case 2: return ISL_FORMAT_R16_UINT;
433 case 3: return ISL_FORMAT_R8G8B8_UINT;
434 case 4: return ISL_FORMAT_R32_UINT;
435 case 6: return ISL_FORMAT_R16G16B16_UINT;
436 case 8: return ISL_FORMAT_R32G32_UINT;
437 case 12: return ISL_FORMAT_R32G32B32_UINT;
438 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
439 default:
440 unreachable("Unknown format size");
441 }
442 }
443
444 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2 * region,bool buffer_to_image)445 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
446 struct blorp_batch *batch,
447 struct anv_buffer *anv_buffer,
448 struct anv_image *anv_image,
449 VkImageLayout image_layout,
450 const VkBufferImageCopy2* region,
451 bool buffer_to_image)
452 {
453 struct {
454 struct blorp_surf surf;
455 uint32_t level;
456 VkOffset3D offset;
457 } image, buffer, *src, *dst;
458
459 buffer.level = 0;
460 buffer.offset = (VkOffset3D) { 0, 0, 0 };
461
462 if (buffer_to_image) {
463 src = &buffer;
464 dst = ℑ
465 } else {
466 src = ℑ
467 dst = &buffer;
468 }
469
470 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
471
472 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
473 buffer_to_image ?
474 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
475 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
476 image_layout, ISL_AUX_USAGE_NONE,
477 &image.surf);
478 image.offset =
479 vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
480 image.level = region->imageSubresource.mipLevel;
481
482 VkExtent3D extent =
483 vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
484 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
485 image.offset.z = region->imageSubresource.baseArrayLayer;
486 extent.depth =
487 vk_image_subresource_layer_count(&anv_image->vk,
488 ®ion->imageSubresource);
489 }
490
491 const enum isl_format linear_format =
492 anv_get_isl_format(cmd_buffer->device->info, anv_image->vk.format,
493 aspect, VK_IMAGE_TILING_LINEAR);
494 const struct isl_format_layout *linear_fmtl =
495 isl_format_get_layout(linear_format);
496
497 const struct vk_image_buffer_layout buffer_layout =
498 vk_image_buffer_copy_layout(&anv_image->vk, region);
499
500 /* Some formats have additional restrictions which may cause ISL to
501 * fail to create a surface for us. For example, YCbCr formats
502 * have to have 2-pixel aligned strides.
503 *
504 * To avoid these issues, we always bind the buffer as if it's a
505 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
506 * the format doesn't matter as long as it has the right bpb.
507 */
508 const VkExtent2D buffer_extent = {
509 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
510 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
511 };
512 const enum isl_format buffer_format =
513 isl_format_for_size(linear_fmtl->bpb / 8);
514
515 struct isl_surf buffer_isl_surf;
516 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
517 anv_buffer, region->bufferOffset,
518 buffer_extent.width, buffer_extent.height,
519 buffer_layout.row_stride_B, buffer_format,
520 false, &buffer.surf, &buffer_isl_surf);
521
522 bool dst_has_shadow = false;
523 struct blorp_surf dst_shadow_surf;
524 if (&image == dst) {
525 /* In this case, the source is the buffer and, since blorp takes its
526 * copy dimensions in terms of the source format, we have to use the
527 * scaled down version for compressed textures because the source
528 * format is an RGB format.
529 */
530 extent.width = buffer_extent.width;
531 extent.height = buffer_extent.height;
532
533 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
534 aspect, dst->surf.aux_usage,
535 dst->level,
536 dst->offset.z, extent.depth);
537
538 dst_has_shadow =
539 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
540 anv_image, aspect,
541 &dst_shadow_surf);
542 }
543
544 for (unsigned z = 0; z < extent.depth; z++) {
545 blorp_copy(batch, &src->surf, src->level, src->offset.z,
546 &dst->surf, dst->level, dst->offset.z,
547 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
548 extent.width, extent.height);
549
550 if (dst_has_shadow) {
551 blorp_copy(batch, &src->surf, src->level, src->offset.z,
552 &dst_shadow_surf, dst->level, dst->offset.z,
553 src->offset.x, src->offset.y,
554 dst->offset.x, dst->offset.y,
555 extent.width, extent.height);
556 }
557
558 image.offset.z++;
559 buffer.surf.addr.offset += buffer_layout.image_stride_B;
560 }
561 }
562
anv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)563 void anv_CmdCopyBufferToImage2(
564 VkCommandBuffer commandBuffer,
565 const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo)
566 {
567 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
568 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
569 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
570
571 struct blorp_batch batch;
572 anv_blorp_batch_init(cmd_buffer, &batch, 0);
573
574 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
575 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
576 pCopyBufferToImageInfo->dstImageLayout,
577 &pCopyBufferToImageInfo->pRegions[r], true);
578 }
579
580 anv_blorp_batch_finish(&batch);
581 }
582
anv_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)583 void anv_CmdCopyImageToBuffer2(
584 VkCommandBuffer commandBuffer,
585 const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
586 {
587 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
588 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
589 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
590
591 struct blorp_batch batch;
592 anv_blorp_batch_init(cmd_buffer, &batch, 0);
593
594 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
595 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
596 pCopyImageToBufferInfo->srcImageLayout,
597 &pCopyImageToBufferInfo->pRegions[r], false);
598 }
599
600 anv_blorp_batch_finish(&batch);
601
602 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
603 }
604
605 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)606 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
607 {
608 bool flip = false;
609 if (*src0 > *src1) {
610 unsigned tmp = *src0;
611 *src0 = *src1;
612 *src1 = tmp;
613 flip = !flip;
614 }
615
616 if (*dst0 > *dst1) {
617 unsigned tmp = *dst0;
618 *dst0 = *dst1;
619 *dst1 = tmp;
620 flip = !flip;
621 }
622
623 return flip;
624 }
625
626 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2 * region,VkFilter filter)627 blit_image(struct anv_cmd_buffer *cmd_buffer,
628 struct blorp_batch *batch,
629 struct anv_image *src_image,
630 VkImageLayout src_image_layout,
631 struct anv_image *dst_image,
632 VkImageLayout dst_image_layout,
633 const VkImageBlit2 *region,
634 VkFilter filter)
635 {
636 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
637 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
638
639 struct blorp_surf src, dst;
640
641 enum blorp_filter blorp_filter;
642 switch (filter) {
643 case VK_FILTER_NEAREST:
644 blorp_filter = BLORP_FILTER_NEAREST;
645 break;
646 case VK_FILTER_LINEAR:
647 blorp_filter = BLORP_FILTER_BILINEAR;
648 break;
649 default:
650 unreachable("Invalid filter");
651 }
652
653 assert(anv_image_aspects_compatible(src_res->aspectMask,
654 dst_res->aspectMask));
655
656 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
657 get_blorp_surf_for_anv_image(cmd_buffer->device,
658 src_image, 1U << aspect_bit,
659 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
660 src_image_layout, ISL_AUX_USAGE_NONE, &src);
661 get_blorp_surf_for_anv_image(cmd_buffer->device,
662 dst_image, 1U << aspect_bit,
663 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
664 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
665
666 struct anv_format_plane src_format =
667 anv_get_format_aspect(cmd_buffer->device->info, src_image->vk.format,
668 1U << aspect_bit, src_image->vk.tiling);
669 struct anv_format_plane dst_format =
670 anv_get_format_aspect(cmd_buffer->device->info, dst_image->vk.format,
671 1U << aspect_bit, dst_image->vk.tiling);
672
673 unsigned dst_start, dst_end;
674 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
675 assert(dst_res->baseArrayLayer == 0);
676 dst_start = region->dstOffsets[0].z;
677 dst_end = region->dstOffsets[1].z;
678 } else {
679 dst_start = dst_res->baseArrayLayer;
680 dst_end = dst_start +
681 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
682 }
683
684 unsigned src_start, src_end;
685 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
686 assert(src_res->baseArrayLayer == 0);
687 src_start = region->srcOffsets[0].z;
688 src_end = region->srcOffsets[1].z;
689 } else {
690 src_start = src_res->baseArrayLayer;
691 src_end = src_start +
692 vk_image_subresource_layer_count(&src_image->vk, src_res);
693 }
694
695 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
696 const unsigned num_layers = dst_end - dst_start;
697 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
698
699 /* There is no interpolation to the pixel center during rendering, so
700 * add the 0.5 offset ourselves here. */
701 float depth_center_offset = 0;
702 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
703 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
704
705 if (flip_z) {
706 src_start = src_end;
707 src_z_step *= -1;
708 depth_center_offset *= -1;
709 }
710
711 unsigned src_x0 = region->srcOffsets[0].x;
712 unsigned src_x1 = region->srcOffsets[1].x;
713 unsigned dst_x0 = region->dstOffsets[0].x;
714 unsigned dst_x1 = region->dstOffsets[1].x;
715 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
716
717 unsigned src_y0 = region->srcOffsets[0].y;
718 unsigned src_y1 = region->srcOffsets[1].y;
719 unsigned dst_y0 = region->dstOffsets[0].y;
720 unsigned dst_y1 = region->dstOffsets[1].y;
721 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
722
723 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
724 1U << aspect_bit,
725 dst.aux_usage,
726 dst_res->mipLevel,
727 dst_start, num_layers);
728
729 for (unsigned i = 0; i < num_layers; i++) {
730 unsigned dst_z = dst_start + i;
731 float src_z = src_start + i * src_z_step + depth_center_offset;
732
733 blorp_blit(batch, &src, src_res->mipLevel, src_z,
734 src_format.isl_format, src_format.swizzle,
735 &dst, dst_res->mipLevel, dst_z,
736 dst_format.isl_format, dst_format.swizzle,
737 src_x0, src_y0, src_x1, src_y1,
738 dst_x0, dst_y0, dst_x1, dst_y1,
739 blorp_filter, flip_x, flip_y);
740 }
741 }
742 }
743
anv_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)744 void anv_CmdBlitImage2(
745 VkCommandBuffer commandBuffer,
746 const VkBlitImageInfo2* pBlitImageInfo)
747 {
748 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
749 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
750 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
751
752 struct blorp_batch batch;
753 anv_blorp_batch_init(cmd_buffer, &batch, 0);
754
755 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
756 blit_image(cmd_buffer, &batch,
757 src_image, pBlitImageInfo->srcImageLayout,
758 dst_image, pBlitImageInfo->dstImageLayout,
759 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
760 }
761
762 anv_blorp_batch_finish(&batch);
763 }
764
765 /**
766 * Returns the greatest common divisor of a and b that is a power of two.
767 */
768 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)769 gcd_pow2_u64(uint64_t a, uint64_t b)
770 {
771 assert(a > 0 || b > 0);
772
773 unsigned a_log2 = ffsll(a) - 1;
774 unsigned b_log2 = ffsll(b) - 1;
775
776 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
777 * case, the MIN2() will take the other one. If both are 0 then we will
778 * hit the assert above.
779 */
780 return 1 << MIN2(a_log2, b_log2);
781 }
782
783 /* This is maximum possible width/height our HW can handle */
784 #define MAX_SURFACE_DIM (1ull << 14)
785
786 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2 * region)787 copy_buffer(struct anv_device *device,
788 struct blorp_batch *batch,
789 struct anv_buffer *src_buffer,
790 struct anv_buffer *dst_buffer,
791 const VkBufferCopy2 *region)
792 {
793 struct blorp_address src = {
794 .buffer = src_buffer->address.bo,
795 .offset = src_buffer->address.offset + region->srcOffset,
796 .mocs = anv_mocs(device, src_buffer->address.bo,
797 ISL_SURF_USAGE_TEXTURE_BIT),
798 };
799 struct blorp_address dst = {
800 .buffer = dst_buffer->address.bo,
801 .offset = dst_buffer->address.offset + region->dstOffset,
802 .mocs = anv_mocs(device, dst_buffer->address.bo,
803 ISL_SURF_USAGE_RENDER_TARGET_BIT),
804 };
805
806 blorp_buffer_copy(batch, src, dst, region->size);
807 }
808
anv_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)809 void anv_CmdCopyBuffer2(
810 VkCommandBuffer commandBuffer,
811 const VkCopyBufferInfo2* pCopyBufferInfo)
812 {
813 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
814 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
815 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
816
817 struct blorp_batch batch;
818 anv_blorp_batch_init(cmd_buffer, &batch, 0);
819
820 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
821 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
822 &pCopyBufferInfo->pRegions[r]);
823 }
824
825 anv_blorp_batch_finish(&batch);
826
827 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
828 }
829
830
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)831 void anv_CmdUpdateBuffer(
832 VkCommandBuffer commandBuffer,
833 VkBuffer dstBuffer,
834 VkDeviceSize dstOffset,
835 VkDeviceSize dataSize,
836 const void* pData)
837 {
838 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
839 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
840
841 struct blorp_batch batch;
842 anv_blorp_batch_init(cmd_buffer, &batch, 0);
843
844 /* We can't quite grab a full block because the state stream needs a
845 * little data at the top to build its linked list.
846 */
847 const uint32_t max_update_size =
848 cmd_buffer->device->dynamic_state_pool.block_size - 64;
849
850 assert(max_update_size < MAX_SURFACE_DIM * 4);
851
852 /* We're about to read data that was written from the CPU. Flush the
853 * texture cache so we don't get anything stale.
854 */
855 anv_add_pending_pipe_bits(cmd_buffer,
856 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
857 "before UpdateBuffer");
858
859 while (dataSize) {
860 const uint32_t copy_size = MIN2(dataSize, max_update_size);
861
862 struct anv_state tmp_data =
863 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
864
865 memcpy(tmp_data.map, pData, copy_size);
866
867 struct blorp_address src = {
868 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
869 .offset = tmp_data.offset,
870 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
871 ISL_SURF_USAGE_TEXTURE_BIT, false)
872 };
873 struct blorp_address dst = {
874 .buffer = dst_buffer->address.bo,
875 .offset = dst_buffer->address.offset + dstOffset,
876 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
877 ISL_SURF_USAGE_RENDER_TARGET_BIT),
878 };
879
880 blorp_buffer_copy(&batch, src, dst, copy_size);
881
882 dataSize -= copy_size;
883 dstOffset += copy_size;
884 pData = (void *)pData + copy_size;
885 }
886
887 anv_blorp_batch_finish(&batch);
888
889 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
890 }
891
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)892 void anv_CmdFillBuffer(
893 VkCommandBuffer commandBuffer,
894 VkBuffer dstBuffer,
895 VkDeviceSize dstOffset,
896 VkDeviceSize fillSize,
897 uint32_t data)
898 {
899 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
900 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
901 struct blorp_surf surf;
902 struct isl_surf isl_surf;
903
904 struct blorp_batch batch;
905 anv_blorp_batch_init(cmd_buffer, &batch, 0);
906
907 fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
908
909 /* From the Vulkan spec:
910 *
911 * "size is the number of bytes to fill, and must be either a multiple
912 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
913 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
914 * buffer is not a multiple of 4, then the nearest smaller multiple is
915 * used."
916 */
917 fillSize &= ~3ull;
918
919 /* First, we compute the biggest format that can be used with the
920 * given offsets and size.
921 */
922 int bs = 16;
923 bs = gcd_pow2_u64(bs, dstOffset);
924 bs = gcd_pow2_u64(bs, fillSize);
925 enum isl_format isl_format = isl_format_for_size(bs);
926
927 union isl_color_value color = {
928 .u32 = { data, data, data, data },
929 };
930
931 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
932 while (fillSize >= max_fill_size) {
933 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
934 dst_buffer, dstOffset,
935 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
936 MAX_SURFACE_DIM * bs, isl_format, true,
937 &surf, &isl_surf);
938
939 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
940 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
941 color, 0 /* color_write_disable */);
942 fillSize -= max_fill_size;
943 dstOffset += max_fill_size;
944 }
945
946 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
947 assert(height < MAX_SURFACE_DIM);
948 if (height != 0) {
949 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
950 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
951 dst_buffer, dstOffset,
952 MAX_SURFACE_DIM, height,
953 MAX_SURFACE_DIM * bs, isl_format, true,
954 &surf, &isl_surf);
955
956 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
957 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
958 color, 0 /* color_write_disable */);
959 fillSize -= rect_fill_size;
960 dstOffset += rect_fill_size;
961 }
962
963 if (fillSize != 0) {
964 const uint32_t width = fillSize / bs;
965 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
966 dst_buffer, dstOffset,
967 width, 1,
968 width * bs, isl_format, true,
969 &surf, &isl_surf);
970
971 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
972 0, 0, 1, 0, 0, width, 1,
973 color, 0 /* color_write_disable */);
974 }
975
976 anv_blorp_batch_finish(&batch);
977
978 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
979 }
980
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)981 void anv_CmdClearColorImage(
982 VkCommandBuffer commandBuffer,
983 VkImage _image,
984 VkImageLayout imageLayout,
985 const VkClearColorValue* pColor,
986 uint32_t rangeCount,
987 const VkImageSubresourceRange* pRanges)
988 {
989 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
990 ANV_FROM_HANDLE(anv_image, image, _image);
991
992 struct blorp_batch batch;
993 anv_blorp_batch_init(cmd_buffer, &batch, 0);
994
995 for (unsigned r = 0; r < rangeCount; r++) {
996 if (pRanges[r].aspectMask == 0)
997 continue;
998
999 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1000
1001 struct blorp_surf surf;
1002 get_blorp_surf_for_anv_image(cmd_buffer->device,
1003 image, pRanges[r].aspectMask,
1004 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1005 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1006
1007 struct anv_format_plane src_format =
1008 anv_get_format_aspect(cmd_buffer->device->info, image->vk.format,
1009 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1010
1011 unsigned base_layer = pRanges[r].baseArrayLayer;
1012 uint32_t layer_count =
1013 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1014 uint32_t level_count =
1015 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1016
1017 for (uint32_t i = 0; i < level_count; i++) {
1018 const unsigned level = pRanges[r].baseMipLevel + i;
1019 const unsigned level_width = u_minify(image->vk.extent.width, level);
1020 const unsigned level_height = u_minify(image->vk.extent.height, level);
1021
1022 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1023 base_layer = 0;
1024 layer_count = u_minify(image->vk.extent.depth, level);
1025 }
1026
1027 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1028 pRanges[r].aspectMask,
1029 surf.aux_usage, level,
1030 base_layer, layer_count);
1031
1032 blorp_clear(&batch, &surf,
1033 src_format.isl_format, src_format.swizzle,
1034 level, base_layer, layer_count,
1035 0, 0, level_width, level_height,
1036 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1037 }
1038 }
1039
1040 anv_blorp_batch_finish(&batch);
1041 }
1042
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1043 void anv_CmdClearDepthStencilImage(
1044 VkCommandBuffer commandBuffer,
1045 VkImage image_h,
1046 VkImageLayout imageLayout,
1047 const VkClearDepthStencilValue* pDepthStencil,
1048 uint32_t rangeCount,
1049 const VkImageSubresourceRange* pRanges)
1050 {
1051 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1052 ANV_FROM_HANDLE(anv_image, image, image_h);
1053
1054 struct blorp_batch batch;
1055 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1056 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1057
1058 struct blorp_surf depth, stencil, stencil_shadow;
1059 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1060 get_blorp_surf_for_anv_image(cmd_buffer->device,
1061 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1062 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1063 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1064 } else {
1065 memset(&depth, 0, sizeof(depth));
1066 }
1067
1068 bool has_stencil_shadow = false;
1069 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1070 get_blorp_surf_for_anv_image(cmd_buffer->device,
1071 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1072 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1073 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1074
1075 has_stencil_shadow =
1076 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1077 VK_IMAGE_ASPECT_STENCIL_BIT,
1078 &stencil_shadow);
1079 } else {
1080 memset(&stencil, 0, sizeof(stencil));
1081 }
1082
1083 for (unsigned r = 0; r < rangeCount; r++) {
1084 if (pRanges[r].aspectMask == 0)
1085 continue;
1086
1087 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1088 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1089
1090 unsigned base_layer = pRanges[r].baseArrayLayer;
1091 uint32_t layer_count =
1092 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1093 uint32_t level_count =
1094 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1095
1096 for (uint32_t i = 0; i < level_count; i++) {
1097 const unsigned level = pRanges[r].baseMipLevel + i;
1098 const unsigned level_width = u_minify(image->vk.extent.width, level);
1099 const unsigned level_height = u_minify(image->vk.extent.height, level);
1100
1101 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1102 layer_count = u_minify(image->vk.extent.depth, level);
1103
1104 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1105 level, base_layer, layer_count,
1106 0, 0, level_width, level_height,
1107 clear_depth, pDepthStencil->depth,
1108 clear_stencil ? 0xff : 0,
1109 pDepthStencil->stencil);
1110
1111 if (clear_stencil && has_stencil_shadow) {
1112 union isl_color_value stencil_color = {
1113 .u32 = { pDepthStencil->stencil, },
1114 };
1115 blorp_clear(&batch, &stencil_shadow,
1116 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1117 level, base_layer, layer_count,
1118 0, 0, level_width, level_height,
1119 stencil_color, 0 /* color_write_disable */);
1120 }
1121 }
1122 }
1123
1124 anv_blorp_batch_finish(&batch);
1125 }
1126
1127 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1128 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1129 uint32_t num_entries,
1130 uint32_t *state_offset,
1131 struct anv_state *bt_state)
1132 {
1133 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1134 state_offset);
1135 if (bt_state->map == NULL) {
1136 /* We ran out of space. Grab a new binding table block. */
1137 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1138 if (result != VK_SUCCESS)
1139 return result;
1140
1141 /* Re-emit state base addresses so we get the new surface state base
1142 * address before we start emitting binding tables etc.
1143 */
1144 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1145
1146 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1147 state_offset);
1148 assert(bt_state->map != NULL);
1149 }
1150
1151 return VK_SUCCESS;
1152 }
1153
1154 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1155 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1156 struct anv_state surface_state,
1157 uint32_t *bt_offset)
1158 {
1159 uint32_t state_offset;
1160 struct anv_state bt_state;
1161
1162 VkResult result =
1163 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1164 &bt_state);
1165 if (result != VK_SUCCESS)
1166 return result;
1167
1168 uint32_t *bt_map = bt_state.map;
1169 bt_map[0] = surface_state.offset + state_offset;
1170
1171 *bt_offset = bt_state.offset;
1172 return VK_SUCCESS;
1173 }
1174
1175 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1176 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1177 struct blorp_batch *batch,
1178 const VkClearAttachment *attachment,
1179 uint32_t rectCount, const VkClearRect *pRects)
1180 {
1181 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1182 const uint32_t att_idx = attachment->colorAttachment;
1183 assert(att_idx < gfx->color_att_count);
1184 const struct anv_attachment *att = &gfx->color_att[att_idx];
1185
1186 if (att->vk_format == VK_FORMAT_UNDEFINED)
1187 return;
1188
1189 uint32_t binding_table;
1190 VkResult result =
1191 binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1192 &binding_table);
1193 if (result != VK_SUCCESS)
1194 return;
1195
1196 union isl_color_value clear_color =
1197 vk_to_isl_color(attachment->clearValue.color);
1198
1199 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1200 if (gfx->view_mask) {
1201 u_foreach_bit(view_idx, gfx->view_mask) {
1202 for (uint32_t r = 0; r < rectCount; ++r) {
1203 const VkOffset2D offset = pRects[r].rect.offset;
1204 const VkExtent2D extent = pRects[r].rect.extent;
1205 blorp_clear_attachments(batch, binding_table,
1206 ISL_FORMAT_UNSUPPORTED,
1207 gfx->samples,
1208 view_idx, 1,
1209 offset.x, offset.y,
1210 offset.x + extent.width,
1211 offset.y + extent.height,
1212 true, clear_color, false, 0.0f, 0, 0);
1213 }
1214 }
1215 return;
1216 }
1217
1218 for (uint32_t r = 0; r < rectCount; ++r) {
1219 const VkOffset2D offset = pRects[r].rect.offset;
1220 const VkExtent2D extent = pRects[r].rect.extent;
1221 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1222 blorp_clear_attachments(batch, binding_table,
1223 ISL_FORMAT_UNSUPPORTED,
1224 gfx->samples,
1225 pRects[r].baseArrayLayer,
1226 pRects[r].layerCount,
1227 offset.x, offset.y,
1228 offset.x + extent.width, offset.y + extent.height,
1229 true, clear_color, false, 0.0f, 0, 0);
1230 }
1231 }
1232
1233 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1234 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1235 struct blorp_batch *batch,
1236 const VkClearAttachment *attachment,
1237 uint32_t rectCount, const VkClearRect *pRects)
1238 {
1239 static const union isl_color_value color_value = { .u32 = { 0, } };
1240 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1241 const struct anv_attachment *d_att = &gfx->depth_att;
1242 const struct anv_attachment *s_att = &gfx->stencil_att;
1243 if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1244 s_att->vk_format == VK_FORMAT_UNDEFINED)
1245 return;
1246
1247 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1248 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1249
1250 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1251 if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1252 depth_format = anv_get_isl_format(cmd_buffer->device->info,
1253 d_att->vk_format,
1254 VK_IMAGE_ASPECT_DEPTH_BIT,
1255 VK_IMAGE_TILING_OPTIMAL);
1256 }
1257
1258 uint32_t binding_table;
1259 VkResult result =
1260 binding_table_for_surface_state(cmd_buffer,
1261 gfx->null_surface_state,
1262 &binding_table);
1263 if (result != VK_SUCCESS)
1264 return;
1265
1266 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1267 if (gfx->view_mask) {
1268 u_foreach_bit(view_idx, gfx->view_mask) {
1269 for (uint32_t r = 0; r < rectCount; ++r) {
1270 const VkOffset2D offset = pRects[r].rect.offset;
1271 const VkExtent2D extent = pRects[r].rect.extent;
1272 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1273 blorp_clear_attachments(batch, binding_table,
1274 depth_format,
1275 gfx->samples,
1276 view_idx, 1,
1277 offset.x, offset.y,
1278 offset.x + extent.width,
1279 offset.y + extent.height,
1280 false, color_value,
1281 clear_depth, value.depth,
1282 clear_stencil ? 0xff : 0, value.stencil);
1283 }
1284 }
1285 return;
1286 }
1287
1288 for (uint32_t r = 0; r < rectCount; ++r) {
1289 const VkOffset2D offset = pRects[r].rect.offset;
1290 const VkExtent2D extent = pRects[r].rect.extent;
1291 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1292 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1293 blorp_clear_attachments(batch, binding_table,
1294 depth_format,
1295 gfx->samples,
1296 pRects[r].baseArrayLayer,
1297 pRects[r].layerCount,
1298 offset.x, offset.y,
1299 offset.x + extent.width, offset.y + extent.height,
1300 false, color_value,
1301 clear_depth, value.depth,
1302 clear_stencil ? 0xff : 0, value.stencil);
1303 }
1304 }
1305
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1306 void anv_CmdClearAttachments(
1307 VkCommandBuffer commandBuffer,
1308 uint32_t attachmentCount,
1309 const VkClearAttachment* pAttachments,
1310 uint32_t rectCount,
1311 const VkClearRect* pRects)
1312 {
1313 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1314
1315 /* Because this gets called within a render pass, we tell blorp not to
1316 * trash our depth and stencil buffers.
1317 */
1318 struct blorp_batch batch;
1319 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1320 if (cmd_buffer->state.conditional_render_enabled) {
1321 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1322 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1323 }
1324 anv_blorp_batch_init(cmd_buffer, &batch, flags);
1325
1326 for (uint32_t a = 0; a < attachmentCount; ++a) {
1327 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1328 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1329 clear_color_attachment(cmd_buffer, &batch,
1330 &pAttachments[a],
1331 rectCount, pRects);
1332 } else {
1333 clear_depth_stencil_attachment(cmd_buffer, &batch,
1334 &pAttachments[a],
1335 rectCount, pRects);
1336 }
1337 }
1338
1339 anv_blorp_batch_finish(&batch);
1340 }
1341
1342 void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)1343 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1344 const struct anv_image *src_image,
1345 enum isl_aux_usage src_aux_usage,
1346 uint32_t src_level, uint32_t src_base_layer,
1347 const struct anv_image *dst_image,
1348 enum isl_aux_usage dst_aux_usage,
1349 uint32_t dst_level, uint32_t dst_base_layer,
1350 VkImageAspectFlagBits aspect,
1351 uint32_t src_x, uint32_t src_y,
1352 uint32_t dst_x, uint32_t dst_y,
1353 uint32_t width, uint32_t height,
1354 uint32_t layer_count,
1355 enum blorp_filter filter)
1356 {
1357 struct blorp_batch batch;
1358 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1359 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1360
1361 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
1362 assert(src_image->vk.samples > 1);
1363 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
1364 assert(dst_image->vk.samples == 1);
1365
1366 struct blorp_surf src_surf, dst_surf;
1367 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1368 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1369 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1370 src_aux_usage, &src_surf);
1371 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1372 src_surf.clear_color_addr = anv_to_blorp_address(
1373 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1374 VK_IMAGE_ASPECT_COLOR_BIT));
1375 }
1376 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1377 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1378 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1379 dst_aux_usage, &dst_surf);
1380 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1381 aspect, dst_aux_usage,
1382 dst_level, dst_base_layer, layer_count);
1383
1384 if (filter == BLORP_FILTER_NONE) {
1385 /* If no explicit filter is provided, then it's implied by the type of
1386 * the source image.
1387 */
1388 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1389 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1390 isl_format_has_int_channel(src_surf.surf->format)) {
1391 filter = BLORP_FILTER_SAMPLE_0;
1392 } else {
1393 filter = BLORP_FILTER_AVERAGE;
1394 }
1395 }
1396
1397 for (uint32_t l = 0; l < layer_count; l++) {
1398 blorp_blit(&batch,
1399 &src_surf, src_level, src_base_layer + l,
1400 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1401 &dst_surf, dst_level, dst_base_layer + l,
1402 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1403 src_x, src_y, src_x + width, src_y + height,
1404 dst_x, dst_y, dst_x + width, dst_y + height,
1405 filter, false, false);
1406 }
1407
1408 anv_blorp_batch_finish(&batch);
1409 }
1410
1411 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)1412 resolve_image(struct anv_cmd_buffer *cmd_buffer,
1413 struct anv_image *src_image,
1414 VkImageLayout src_image_layout,
1415 struct anv_image *dst_image,
1416 VkImageLayout dst_image_layout,
1417 const VkImageResolve2 *region)
1418 {
1419 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
1420 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
1421 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
1422
1423 const uint32_t layer_count =
1424 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
1425
1426 anv_foreach_image_aspect_bit(aspect_bit, src_image,
1427 region->srcSubresource.aspectMask) {
1428 enum isl_aux_usage src_aux_usage =
1429 anv_layout_to_aux_usage(cmd_buffer->device->info, src_image,
1430 (1 << aspect_bit),
1431 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1432 src_image_layout);
1433 enum isl_aux_usage dst_aux_usage =
1434 anv_layout_to_aux_usage(cmd_buffer->device->info, dst_image,
1435 (1 << aspect_bit),
1436 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1437 dst_image_layout);
1438
1439 anv_image_msaa_resolve(cmd_buffer,
1440 src_image, src_aux_usage,
1441 region->srcSubresource.mipLevel,
1442 region->srcSubresource.baseArrayLayer,
1443 dst_image, dst_aux_usage,
1444 region->dstSubresource.mipLevel,
1445 region->dstSubresource.baseArrayLayer,
1446 (1 << aspect_bit),
1447 region->srcOffset.x,
1448 region->srcOffset.y,
1449 region->dstOffset.x,
1450 region->dstOffset.y,
1451 region->extent.width,
1452 region->extent.height,
1453 layer_count, BLORP_FILTER_NONE);
1454 }
1455 }
1456
anv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)1457 void anv_CmdResolveImage2(
1458 VkCommandBuffer commandBuffer,
1459 const VkResolveImageInfo2* pResolveImageInfo)
1460 {
1461 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1462 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
1463 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
1464
1465 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
1466 resolve_image(cmd_buffer,
1467 src_image, pResolveImageInfo->srcImageLayout,
1468 dst_image, pResolveImageInfo->dstImageLayout,
1469 &pResolveImageInfo->pRegions[r]);
1470 }
1471 }
1472
1473 void
anv_image_copy_to_shadow(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t base_level,uint32_t level_count,uint32_t base_layer,uint32_t layer_count)1474 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1475 const struct anv_image *image,
1476 VkImageAspectFlagBits aspect,
1477 uint32_t base_level, uint32_t level_count,
1478 uint32_t base_layer, uint32_t layer_count)
1479 {
1480 struct blorp_batch batch;
1481 anv_blorp_batch_init(cmd_buffer, &batch,
1482 /* If the sample count is set, we are in a render pass
1483 * and don't want blorp to overwrite depth/stencil
1484 * state
1485 */
1486 cmd_buffer->state.gfx.samples ? BLORP_BATCH_NO_EMIT_DEPTH_STENCIL : 0);
1487
1488 /* We don't know who touched the main surface last so flush a bunch of
1489 * caches to ensure we get good data.
1490 */
1491 anv_add_pending_pipe_bits(cmd_buffer,
1492 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1493 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1494 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1495 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1496 "before copy_to_shadow");
1497
1498 struct blorp_surf surf;
1499 get_blorp_surf_for_anv_image(cmd_buffer->device,
1500 image, aspect,
1501 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1502 VK_IMAGE_LAYOUT_GENERAL,
1503 ISL_AUX_USAGE_NONE, &surf);
1504 assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1505
1506 struct blorp_surf shadow_surf;
1507 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1508 image, aspect, &shadow_surf);
1509
1510 for (uint32_t l = 0; l < level_count; l++) {
1511 const uint32_t level = base_level + l;
1512
1513 const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level);
1514
1515 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1516 layer_count = extent.depth;
1517
1518 for (uint32_t a = 0; a < layer_count; a++) {
1519 const uint32_t layer = base_layer + a;
1520
1521 blorp_copy(&batch, &surf, level, layer,
1522 &shadow_surf, level, layer,
1523 0, 0, 0, 0, extent.width, extent.height);
1524 }
1525 }
1526
1527 /* We just wrote to the buffer with the render cache. Flush it. */
1528 anv_add_pending_pipe_bits(cmd_buffer,
1529 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
1530 "after copy_to_shadow");
1531
1532 anv_blorp_batch_finish(&batch);
1533 }
1534
1535 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)1536 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1537 const struct anv_image *image,
1538 VkImageAspectFlagBits aspect,
1539 enum isl_aux_usage aux_usage,
1540 enum isl_format format, struct isl_swizzle swizzle,
1541 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1542 VkRect2D area, union isl_color_value clear_color)
1543 {
1544 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1545
1546 /* We don't support planar images with multisampling yet */
1547 assert(image->n_planes == 1);
1548
1549 struct blorp_batch batch;
1550 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1551
1552 struct blorp_surf surf;
1553 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1554 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1555 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1556 aux_usage, &surf);
1557 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1558 level, base_layer, layer_count);
1559
1560 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1561 level, base_layer, layer_count,
1562 area.offset.x, area.offset.y,
1563 area.offset.x + area.extent.width,
1564 area.offset.y + area.extent.height,
1565 clear_color, 0 /* color_write_disable */);
1566
1567 anv_blorp_batch_finish(&batch);
1568 }
1569
1570 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)1571 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1572 const struct anv_image *image,
1573 VkImageAspectFlags aspects,
1574 enum isl_aux_usage depth_aux_usage,
1575 uint32_t level,
1576 uint32_t base_layer, uint32_t layer_count,
1577 VkRect2D area,
1578 float depth_value, uint8_t stencil_value)
1579 {
1580 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1581 VK_IMAGE_ASPECT_STENCIL_BIT));
1582
1583 struct blorp_batch batch;
1584 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1585 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1586
1587 struct blorp_surf depth = {};
1588 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1589 get_blorp_surf_for_anv_image(cmd_buffer->device,
1590 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1591 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1592 depth_aux_usage, &depth);
1593 }
1594
1595 struct blorp_surf stencil = {};
1596 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1597 const uint32_t plane =
1598 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1599 get_blorp_surf_for_anv_image(cmd_buffer->device,
1600 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1601 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1602 image->planes[plane].aux_usage, &stencil);
1603 }
1604
1605 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1606 * performance. If it does this, we need to flush it out of the depth
1607 * cache before rendering to it.
1608 */
1609 anv_add_pending_pipe_bits(cmd_buffer,
1610 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1611 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1612 "before clear DS");
1613
1614 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1615 level, base_layer, layer_count,
1616 area.offset.x, area.offset.y,
1617 area.offset.x + area.extent.width,
1618 area.offset.y + area.extent.height,
1619 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1620 depth_value,
1621 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1622 stencil_value);
1623
1624 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1625 * performance. If it does this, we need to flush it out of the render
1626 * cache before someone starts trying to do stencil on it.
1627 */
1628 anv_add_pending_pipe_bits(cmd_buffer,
1629 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1630 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1631 "after clear DS");
1632
1633 struct blorp_surf stencil_shadow;
1634 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1635 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1636 VK_IMAGE_ASPECT_STENCIL_BIT,
1637 &stencil_shadow)) {
1638 union isl_color_value stencil_color = {
1639 .u32 = { stencil_value },
1640 };
1641 blorp_clear(&batch, &stencil_shadow,
1642 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1643 level, base_layer, layer_count,
1644 area.offset.x, area.offset.y,
1645 area.offset.x + area.extent.width,
1646 area.offset.y + area.extent.height,
1647 stencil_color, 0 /* color_write_disable */);
1648 }
1649
1650 anv_blorp_batch_finish(&batch);
1651 }
1652
1653 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)1654 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1655 const struct anv_image *image,
1656 VkImageAspectFlagBits aspect, uint32_t level,
1657 uint32_t base_layer, uint32_t layer_count,
1658 enum isl_aux_op hiz_op)
1659 {
1660 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1661 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1662 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1663 assert(plane == 0);
1664
1665 struct blorp_batch batch;
1666 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1667 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1668
1669 struct blorp_surf surf;
1670 get_blorp_surf_for_anv_image(cmd_buffer->device,
1671 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1672 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1673 image->planes[plane].aux_usage, &surf);
1674
1675 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1676
1677 anv_blorp_batch_finish(&batch);
1678 }
1679
1680 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1681 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1682 const struct anv_image *image,
1683 VkImageAspectFlags aspects,
1684 uint32_t level,
1685 uint32_t base_layer, uint32_t layer_count,
1686 VkRect2D area, uint8_t stencil_value)
1687 {
1688 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1689 VK_IMAGE_ASPECT_STENCIL_BIT));
1690
1691 struct blorp_batch batch;
1692 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1693 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1694
1695 struct blorp_surf depth = {};
1696 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1697 const uint32_t plane =
1698 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1699 assert(base_layer + layer_count <=
1700 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1701 get_blorp_surf_for_anv_image(cmd_buffer->device,
1702 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1703 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1704 image->planes[plane].aux_usage, &depth);
1705 }
1706
1707 struct blorp_surf stencil = {};
1708 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1709 const uint32_t plane =
1710 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1711 get_blorp_surf_for_anv_image(cmd_buffer->device,
1712 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1713 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1714 image->planes[plane].aux_usage, &stencil);
1715 }
1716
1717 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1718 *
1719 * "The following is required when performing a depth buffer clear with
1720 * using the WM_STATE or 3DSTATE_WM:
1721 *
1722 * * If other rendering operations have preceded this clear, a
1723 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1724 * enabled must be issued before the rectangle primitive used for
1725 * the depth buffer clear operation.
1726 * * [...]"
1727 *
1728 * Even though the PRM only says that this is required if using 3DSTATE_WM
1729 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1730 * hangs when doing a clear with WM_HZ_OP.
1731 */
1732 anv_add_pending_pipe_bits(cmd_buffer,
1733 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1734 ANV_PIPE_DEPTH_STALL_BIT,
1735 "before clear hiz");
1736
1737 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1738 level, base_layer, layer_count,
1739 area.offset.x, area.offset.y,
1740 area.offset.x + area.extent.width,
1741 area.offset.y + area.extent.height,
1742 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1743 ANV_HZ_FC_VAL,
1744 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1745 stencil_value);
1746
1747 anv_blorp_batch_finish(&batch);
1748
1749 /* From the SKL PRM, Depth Buffer Clear:
1750 *
1751 * "Depth Buffer Clear Workaround
1752 *
1753 * Depth buffer clear pass using any of the methods (WM_STATE,
1754 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1755 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1756 * starting to render. DepthStall and DepthFlush are not needed between
1757 * consecutive depth clear passes nor is it required if the depth-clear
1758 * pass was done with “full_surf_clear” bit set in the
1759 * 3DSTATE_WM_HZ_OP."
1760 *
1761 * Even though the PRM provides a bunch of conditions under which this is
1762 * supposedly unnecessary, we choose to perform the flush unconditionally
1763 * just to be safe.
1764 */
1765 anv_add_pending_pipe_bits(cmd_buffer,
1766 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1767 ANV_PIPE_DEPTH_STALL_BIT,
1768 "after clear hiz");
1769 }
1770
1771 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)1772 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1773 const struct anv_image *image,
1774 enum isl_format format, struct isl_swizzle swizzle,
1775 VkImageAspectFlagBits aspect,
1776 uint32_t base_layer, uint32_t layer_count,
1777 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1778 bool predicate)
1779 {
1780 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1781 assert(image->vk.samples > 1);
1782 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1783
1784 /* Multisampling with multi-planar formats is not supported */
1785 assert(image->n_planes == 1);
1786
1787 struct blorp_batch batch;
1788 anv_blorp_batch_init(cmd_buffer, &batch,
1789 BLORP_BATCH_PREDICATE_ENABLE * predicate);
1790 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1791
1792 struct blorp_surf surf;
1793 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1794 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1795 ISL_AUX_USAGE_MCS, &surf);
1796
1797 /* Blorp will store the clear color for us if we provide the clear color
1798 * address and we are doing a fast clear. So we save the clear value into
1799 * the blorp surface.
1800 */
1801 if (clear_value)
1802 surf.clear_color = *clear_value;
1803
1804 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1805 *
1806 * "After Render target fast clear, pipe-control with color cache
1807 * write-flush must be issued before sending any DRAW commands on
1808 * that render target."
1809 *
1810 * This comment is a bit cryptic and doesn't really tell you what's going
1811 * or what's really needed. It appears that fast clear ops are not
1812 * properly synchronized with other drawing. This means that we cannot
1813 * have a fast clear operation in the pipe at the same time as other
1814 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1815 * that the contents of the previous draw hit the render target before we
1816 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1817 * that it is completed before any additional drawing occurs.
1818 */
1819 anv_add_pending_pipe_bits(cmd_buffer,
1820 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1821 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1822 ANV_PIPE_PSS_STALL_SYNC_BIT |
1823 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1824 "before fast clear mcs");
1825
1826 if (!blorp_address_is_null(surf.clear_color_addr)) {
1827 anv_add_pending_pipe_bits(cmd_buffer,
1828 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
1829 "before blorp clear color edit");
1830 }
1831
1832 switch (mcs_op) {
1833 case ISL_AUX_OP_FAST_CLEAR:
1834 blorp_fast_clear(&batch, &surf, format, swizzle,
1835 0, base_layer, layer_count,
1836 0, 0, image->vk.extent.width, image->vk.extent.height);
1837 break;
1838 case ISL_AUX_OP_PARTIAL_RESOLVE:
1839 blorp_mcs_partial_resolve(&batch, &surf, format,
1840 base_layer, layer_count);
1841 break;
1842 case ISL_AUX_OP_FULL_RESOLVE:
1843 case ISL_AUX_OP_AMBIGUATE:
1844 default:
1845 unreachable("Unsupported MCS operation");
1846 }
1847
1848 anv_add_pending_pipe_bits(cmd_buffer,
1849 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1850 ANV_PIPE_PSS_STALL_SYNC_BIT |
1851 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1852 "after fast clear mcs");
1853
1854 anv_blorp_batch_finish(&batch);
1855 }
1856
1857 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)1858 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1859 const struct anv_image *image,
1860 enum isl_format format, struct isl_swizzle swizzle,
1861 VkImageAspectFlagBits aspect, uint32_t level,
1862 uint32_t base_layer, uint32_t layer_count,
1863 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1864 bool predicate)
1865 {
1866 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1867 assert(image->vk.samples == 1);
1868 assert(level < anv_image_aux_levels(image, aspect));
1869 /* Multi-LOD YcBcR is not allowed */
1870 assert(image->n_planes == 1 || level == 0);
1871 assert(base_layer + layer_count <=
1872 anv_image_aux_layers(image, aspect, level));
1873
1874 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1875
1876 struct blorp_batch batch;
1877 anv_blorp_batch_init(cmd_buffer, &batch,
1878 BLORP_BATCH_PREDICATE_ENABLE * predicate);
1879 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1880
1881 struct blorp_surf surf;
1882 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1883 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1884 image->planes[plane].aux_usage,
1885 &surf);
1886
1887 uint32_t level_width = u_minify(surf.surf->logical_level0_px.w, level);
1888 uint32_t level_height = u_minify(surf.surf->logical_level0_px.h, level);
1889
1890 /* Blorp will store the clear color for us if we provide the clear color
1891 * address and we are doing a fast clear. So we save the clear value into
1892 * the blorp surface.
1893 */
1894 if (clear_value)
1895 surf.clear_color = *clear_value;
1896
1897 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1898 *
1899 * "After Render target fast clear, pipe-control with color cache
1900 * write-flush must be issued before sending any DRAW commands on
1901 * that render target."
1902 *
1903 * This comment is a bit cryptic and doesn't really tell you what's going
1904 * or what's really needed. It appears that fast clear ops are not
1905 * properly synchronized with other drawing. This means that we cannot
1906 * have a fast clear operation in the pipe at the same time as other
1907 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1908 * that the contents of the previous draw hit the render target before we
1909 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1910 * that it is completed before any additional drawing occurs.
1911 */
1912 anv_add_pending_pipe_bits(cmd_buffer,
1913 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1914 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1915 ANV_PIPE_PSS_STALL_SYNC_BIT |
1916 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1917 "before fast clear ccs");
1918
1919 if (!blorp_address_is_null(surf.clear_color_addr)) {
1920 anv_add_pending_pipe_bits(cmd_buffer,
1921 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
1922 "before blorp clear color edit");
1923 }
1924
1925 switch (ccs_op) {
1926 case ISL_AUX_OP_FAST_CLEAR:
1927 blorp_fast_clear(&batch, &surf, format, swizzle,
1928 level, base_layer, layer_count,
1929 0, 0, level_width, level_height);
1930 break;
1931 case ISL_AUX_OP_FULL_RESOLVE:
1932 case ISL_AUX_OP_PARTIAL_RESOLVE:
1933 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1934 format, ccs_op);
1935 break;
1936 case ISL_AUX_OP_AMBIGUATE:
1937 for (uint32_t a = 0; a < layer_count; a++) {
1938 const uint32_t layer = base_layer + a;
1939 blorp_ccs_ambiguate(&batch, &surf, level, layer);
1940 }
1941 break;
1942 default:
1943 unreachable("Unsupported CCS operation");
1944 }
1945
1946 anv_add_pending_pipe_bits(cmd_buffer,
1947 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1948 ANV_PIPE_PSS_STALL_SYNC_BIT |
1949 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1950 "after fast clear ccs");
1951
1952 anv_blorp_batch_finish(&batch);
1953 }
1954