/* Copyright © 2024 Intel Corporation * SPDX-License-Identifier: MIT */ #include "anv_private.h" static enum isl_channel_select remap_swizzle(VkComponentSwizzle swizzle, struct isl_swizzle format_swizzle) { switch (swizzle) { case VK_COMPONENT_SWIZZLE_ZERO: return ISL_CHANNEL_SELECT_ZERO; case VK_COMPONENT_SWIZZLE_ONE: return ISL_CHANNEL_SELECT_ONE; case VK_COMPONENT_SWIZZLE_R: return format_swizzle.r; case VK_COMPONENT_SWIZZLE_G: return format_swizzle.g; case VK_COMPONENT_SWIZZLE_B: return format_swizzle.b; case VK_COMPONENT_SWIZZLE_A: return format_swizzle.a; default: unreachable("Invalid swizzle"); } } void anv_image_fill_surface_state(struct anv_device *device, const struct anv_image *image, VkImageAspectFlagBits aspect, const struct isl_view *view_in, isl_surf_usage_flags_t view_usage, enum isl_aux_usage aux_usage, const union isl_color_value *clear_color, enum anv_image_view_state_flags flags, struct anv_surface_state *state_inout) { uint32_t plane = anv_image_aspect_to_plane(image, aspect); if (image->emu_plane_format != VK_FORMAT_UNDEFINED) { const uint16_t view_bpb = isl_format_get_layout(view_in->format)->bpb; const uint16_t plane_bpb = isl_format_get_layout( image->planes[plane].primary_surface.isl.format)->bpb; /* We should redirect to the hidden plane when the original view format * is compressed or when the view usage is storage. But we don't always * have visibility to the original view format so we also check for size * compatibility. */ if (isl_format_is_compressed(view_in->format) || (view_usage & ISL_SURF_USAGE_STORAGE_BIT) || view_bpb != plane_bpb) { plane = image->n_planes; assert(isl_format_get_layout( image->planes[plane].primary_surface.isl.format)->bpb == view_bpb); } } const struct anv_surface *surface = &image->planes[plane].primary_surface, *aux_surface = &image->planes[plane].aux_surface; struct isl_view view = *view_in; view.usage |= view_usage; /* Propagate the protection flag of the image to the view. */ view_usage |= surface->isl.usage & ISL_SURF_USAGE_PROTECTED_BIT; if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) view.swizzle = anv_swizzle_for_render(view.swizzle); /* If this is a HiZ buffer we can sample from with a programmable clear * value (SKL+), define the clear value to the optimal constant. */ union isl_color_value default_clear_color = { .u32 = { 0, } }; if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) default_clear_color = anv_image_hiz_clear_value(image); if (!clear_color) clear_color = &default_clear_color; const struct anv_address address = anv_image_address(image, &surface->memory_range); void *surface_state_map = state_inout->state_data.data; const struct isl_surf *isl_surf = &surface->isl; struct isl_surf tmp_surf; uint64_t offset_B = 0; uint32_t tile_x_sa = 0, tile_y_sa = 0; if (isl_format_is_compressed(surface->isl.format) && !isl_format_is_compressed(view.format)) { /* We're creating an uncompressed view of a compressed surface. This is * allowed but only for a single level/layer. */ assert(surface->isl.samples == 1); assert(view.levels == 1); ASSERTED bool ok = isl_surf_get_uncompressed_surf(&device->isl_dev, isl_surf, &view, &tmp_surf, &view, &offset_B, &tile_x_sa, &tile_y_sa); assert(ok); isl_surf = &tmp_surf; } state_inout->address = anv_address_add(address, offset_B); struct anv_address aux_address = ANV_NULL_ADDRESS; if (aux_usage != ISL_AUX_USAGE_NONE) aux_address = anv_image_address(image, &aux_surface->memory_range); state_inout->aux_address = aux_address; struct anv_address clear_address = ANV_NULL_ADDRESS; if (device->info->ver >= 10 && isl_aux_usage_has_fast_clears(aux_usage)) { clear_address = anv_image_get_clear_color_addr(device, image, aspect); } state_inout->clear_address = clear_address; if (image->vk.create_flags & VK_IMAGE_CREATE_PROTECTED_BIT) view_usage |= ISL_SURF_USAGE_PROTECTED_BIT; isl_surf_fill_state(&device->isl_dev, surface_state_map, .surf = isl_surf, .view = &view, .address = anv_address_physical(state_inout->address), .clear_color = *clear_color, .aux_surf = &aux_surface->isl, .aux_usage = aux_usage, .aux_address = anv_address_physical(aux_address), .clear_address = anv_address_physical(clear_address), .use_clear_address = !anv_address_is_null(clear_address), .mocs = anv_mocs(device, state_inout->address.bo, view_usage), .x_offset_sa = tile_x_sa, .y_offset_sa = tile_y_sa, /* Assume robustness with EXT_pipeline_robustness * because this can be turned on/off per pipeline and * we have no visibility on this here. */ .robust_image_access = device->vk.enabled_features.robustImageAccess || device->vk.enabled_features.robustImageAccess2 || device->vk.enabled_extensions.EXT_pipeline_robustness); /* With the exception of gfx8, the bottom 12 bits of the MCS base address * are used to store other information. This should be ok, however, because * the surface buffer addresses are always 4K page aligned. */ if (!anv_address_is_null(aux_address)) { uint32_t *aux_addr_dw = surface_state_map + device->isl_dev.ss.aux_addr_offset; assert((aux_address.offset & 0xfff) == 0); state_inout->aux_address.offset |= *aux_addr_dw & 0xfff; } if (device->info->ver >= 10 && clear_address.bo) { uint32_t *clear_addr_dw = surface_state_map + device->isl_dev.ss.clear_color_state_offset; assert((clear_address.offset & 0x3f) == 0); state_inout->clear_address.offset |= *clear_addr_dw & 0x3f; } if (state_inout->state.map) memcpy(state_inout->state.map, surface_state_map, ANV_SURFACE_STATE_SIZE); } static uint32_t anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask) { anv_assert_valid_aspect_set(aspect_mask); return util_bitcount(aspect_mask); } bool anv_can_hiz_clear_ds_view(struct anv_device *device, const struct anv_image_view *iview, VkImageLayout layout, VkImageAspectFlags clear_aspects, float depth_clear_value, VkRect2D render_area, const VkQueueFlagBits queue_flags) { if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR)) return false; /* If we're just clearing stencil, we can always HiZ clear */ if (!(clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) return true; /* We must have depth in order to have HiZ */ if (!(iview->image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) return false; const enum isl_aux_usage clear_aux_usage = anv_layout_to_aux_usage(device->info, iview->image, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, layout, queue_flags); if (!isl_aux_usage_has_fast_clears(clear_aux_usage)) return false; if (isl_aux_usage_has_ccs(clear_aux_usage)) { /* From the TGL PRM, Vol 9, "Compressed Depth Buffers" (under the * "Texture performant" and "ZCS" columns): * * Update with clear at either 16x8 or 8x4 granularity, based on * fs_clr or otherwise. * * Although alignment requirements are only listed for the texture * performant mode, test results indicate that requirements exist for * the non-texture performant mode as well. Disable partial clears. */ if (render_area.offset.x > 0 || render_area.offset.y > 0 || render_area.extent.width != u_minify(iview->vk.extent.width, iview->vk.base_mip_level) || render_area.extent.height != u_minify(iview->vk.extent.height, iview->vk.base_mip_level)) { return false; } /* When fast-clearing, hardware behaves in unexpected ways if the clear * rectangle, aligned to 16x8, could cover neighboring LODs. * Fortunately, ISL guarantees that LOD0 will be 8-row aligned and * LOD0's height seems to not matter. Also, few applications ever clear * LOD1+. Only allow fast-clearing upper LODs if no overlap can occur. */ const struct isl_surf *surf = &iview->image->planes[0].primary_surface.isl; assert(isl_surf_usage_is_depth(surf->usage)); assert(surf->dim_layout == ISL_DIM_LAYOUT_GFX4_2D); assert(surf->array_pitch_el_rows % 8 == 0); if (clear_aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT && iview->vk.base_mip_level >= 1 && (iview->vk.extent.width % 32 != 0 || surf->image_alignment_el.h % 8 != 0)) { return false; } } if (device->info->ver <= 12 && depth_clear_value != anv_image_hiz_clear_value(iview->image).f32[0]) return false; /* If we got here, then we can fast clear */ return true; } static bool isl_color_value_requires_conversion(union isl_color_value color, const struct isl_surf *surf, const struct isl_view *view) { if (surf->format == view->format && isl_swizzle_is_identity(view->swizzle)) return false; uint32_t surf_pack[4] = { 0, 0, 0, 0 }; isl_color_value_pack(&color, surf->format, surf_pack); uint32_t view_pack[4] = { 0, 0, 0, 0 }; union isl_color_value swiz_color = isl_color_value_swizzle_inv(color, view->swizzle); isl_color_value_pack(&swiz_color, view->format, view_pack); return memcmp(surf_pack, view_pack, sizeof(surf_pack)) != 0; } bool anv_can_fast_clear_color_view(struct anv_device *device, struct anv_image_view *iview, VkImageLayout layout, union isl_color_value clear_color, uint32_t num_layers, VkRect2D render_area, const VkQueueFlagBits queue_flags) { if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR)) return false; if (iview->planes[0].isl.base_array_layer >= anv_image_aux_layers(iview->image, VK_IMAGE_ASPECT_COLOR_BIT, iview->planes[0].isl.base_level)) return false; /* Start by getting the fast clear type. We use the first subpass * layout here because we don't want to fast-clear if the first subpass * to use the attachment can't handle fast-clears. */ enum anv_fast_clear_type fast_clear_type = anv_layout_to_fast_clear_type(device->info, iview->image, VK_IMAGE_ASPECT_COLOR_BIT, layout, queue_flags); switch (fast_clear_type) { case ANV_FAST_CLEAR_NONE: return false; case ANV_FAST_CLEAR_DEFAULT_VALUE: if (!isl_color_value_is_zero(clear_color, iview->planes[0].isl.format)) return false; break; case ANV_FAST_CLEAR_ANY: break; } /* Potentially, we could do partial fast-clears but doing so has crazy * alignment restrictions. It's easier to just restrict to full size * fast clears for now. */ if (render_area.offset.x != 0 || render_area.offset.y != 0 || render_area.extent.width != iview->vk.extent.width || render_area.extent.height != iview->vk.extent.height) return false; /* If the clear color is one that would require non-trivial format * conversion on resolve, we don't bother with the fast clear. This * shouldn't be common as most clear colors are 0/1 and the most common * format re-interpretation is for sRGB. */ if (isl_color_value_requires_conversion(clear_color, &iview->image->planes[0].primary_surface.isl, &iview->planes[0].isl)) { anv_perf_warn(VK_LOG_OBJS(&iview->vk.base), "Cannot fast-clear to colors which would require " "format conversion on resolve"); return false; } /* We only allow fast clears to the first slice of an image (level 0, * layer 0) and only for the entire slice. This guarantees us that, at * any given time, there is only one clear color on any given image at * any given time. At the time of our testing (Jan 17, 2018), there * were no known applications which would benefit from fast-clearing * more than just the first slice. */ if (iview->planes[0].isl.base_level > 0 || iview->planes[0].isl.base_array_layer > 0) { anv_perf_warn(VK_LOG_OBJS(&iview->image->vk.base), "Rendering with multi-lod or multi-layer framebuffer " "with LOAD_OP_LOAD and baseMipLevel > 0 or " "baseArrayLayer > 0. Not fast clearing."); return false; } if (num_layers > 1) { anv_perf_warn(VK_LOG_OBJS(&iview->image->vk.base), "Rendering to a multi-layer framebuffer with " "LOAD_OP_CLEAR. Only fast-clearing the first slice"); } /* Wa_18020603990 - slow clear surfaces up to 256x256, 32bpp. */ if (intel_needs_workaround(device->info, 18020603990)) { const struct anv_surface *anv_surf = &iview->image->planes->primary_surface; if (isl_format_get_layout(anv_surf->isl.format)->bpb <= 32 && anv_surf->isl.logical_level0_px.w <= 256 && anv_surf->isl.logical_level0_px.h <= 256) return false; } /* On gfx12.0, CCS fast clears don't seem to cover the correct portion of * the aux buffer when the pitch is not 512B-aligned. */ if (device->info->verx10 == 120 && iview->image->planes->primary_surface.isl.samples == 1 && iview->image->planes->primary_surface.isl.row_pitch_B % 512) { anv_perf_warn(VK_LOG_OBJS(&iview->image->vk.base), "Pitch not 512B-aligned. Slow clearing surface."); return false; } /* Disable sRGB fast-clears for non-0/1 color values on Gfx9. For texturing * and draw calls, HW expects the clear color to be in two different color * spaces after sRGB fast-clears - sRGB in the former and linear in the * latter. By limiting the allowable values to 0/1, both color space * requirements are satisfied. * * Gfx11+ is fine as the fast clear generate 2 colors at the clear color * address, raw & converted such that all fixed functions can find the * value they need. */ if (device->info->ver == 9 && isl_format_is_srgb(iview->planes[0].isl.format) && !isl_color_value_is_zero_one(clear_color, iview->planes[0].isl.format)) return false; /* Wa_16021232440: Disable fast clear when height is 16k */ if (intel_needs_workaround(device->info, 16021232440) && iview->vk.extent.height == 16 * 1024) { return false; } return true; } void anv_image_view_init(struct anv_device *device, struct anv_image_view *iview, const VkImageViewCreateInfo *pCreateInfo, struct anv_state_stream *surface_state_stream) { ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); vk_image_view_init(&device->vk, &iview->vk, false, pCreateInfo); iview->image = image; iview->n_planes = anv_image_aspect_get_planes(iview->vk.aspects); iview->use_surface_state_stream = surface_state_stream != NULL; /* Now go through the underlying image selected planes and map them to * planes in the image view. */ anv_foreach_image_aspect_bit(iaspect_bit, image, iview->vk.aspects) { const uint32_t vplane = anv_aspect_to_plane(iview->vk.aspects, 1UL << iaspect_bit); VkFormat view_format = iview->vk.view_format; if (anv_is_format_emulated(device->physical, view_format)) { assert(image->emu_plane_format != VK_FORMAT_UNDEFINED); view_format = anv_get_emulation_format(device->physical, view_format); } const struct anv_format_plane format = anv_get_format_plane( device->info, view_format, vplane, image->vk.tiling); iview->planes[vplane].isl = (struct isl_view) { .format = format.isl_format, .base_level = iview->vk.base_mip_level, .levels = iview->vk.level_count, .base_array_layer = iview->vk.base_array_layer, .array_len = iview->vk.layer_count, .min_lod_clamp = iview->vk.min_lod, .swizzle = { .r = remap_swizzle(iview->vk.swizzle.r, format.swizzle), .g = remap_swizzle(iview->vk.swizzle.g, format.swizzle), .b = remap_swizzle(iview->vk.swizzle.b, format.swizzle), .a = remap_swizzle(iview->vk.swizzle.a, format.swizzle), }, }; if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) { iview->planes[vplane].isl.base_array_layer = 0; iview->planes[vplane].isl.array_len = iview->vk.extent.depth; } if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE || pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { iview->planes[vplane].isl.usage = ISL_SURF_USAGE_CUBE_BIT; } else { iview->planes[vplane].isl.usage = 0; } if (iview->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { iview->planes[vplane].optimal_sampler.state = anv_device_maybe_alloc_surface_state(device, surface_state_stream); iview->planes[vplane].general_sampler.state = anv_device_maybe_alloc_surface_state(device, surface_state_stream); enum isl_aux_usage general_aux_usage = anv_layout_to_aux_usage(device->info, image, 1UL << iaspect_bit, VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); enum isl_aux_usage optimal_aux_usage = anv_layout_to_aux_usage(device->info, image, 1UL << iaspect_bit, VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); anv_image_fill_surface_state(device, image, 1ULL << iaspect_bit, &iview->planes[vplane].isl, ISL_SURF_USAGE_TEXTURE_BIT, optimal_aux_usage, NULL, ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL, &iview->planes[vplane].optimal_sampler); anv_image_fill_surface_state(device, image, 1ULL << iaspect_bit, &iview->planes[vplane].isl, ISL_SURF_USAGE_TEXTURE_BIT, general_aux_usage, NULL, 0, &iview->planes[vplane].general_sampler); } /* NOTE: This one needs to go last since it may stomp isl_view.format */ if (iview->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { struct isl_view storage_view = iview->planes[vplane].isl; if (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D) { storage_view.base_array_layer = iview->vk.storage.z_slice_offset; storage_view.array_len = iview->vk.storage.z_slice_count; } enum isl_aux_usage general_aux_usage = anv_layout_to_aux_usage(device->info, image, 1UL << iaspect_bit, VK_IMAGE_USAGE_STORAGE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); iview->planes[vplane].storage.state = anv_device_maybe_alloc_surface_state(device, surface_state_stream); anv_image_fill_surface_state(device, image, 1ULL << iaspect_bit, &storage_view, ISL_SURF_USAGE_STORAGE_BIT, general_aux_usage, NULL, 0, &iview->planes[vplane].storage); } } } void anv_image_view_finish(struct anv_image_view *iview) { struct anv_device *device = container_of(iview->vk.base.device, struct anv_device, vk); if (!iview->use_surface_state_stream) { for (uint32_t plane = 0; plane < iview->n_planes; plane++) { if (iview->planes[plane].optimal_sampler.state.alloc_size) { anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].optimal_sampler.state); } if (iview->planes[plane].general_sampler.state.alloc_size) { anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].general_sampler.state); } if (iview->planes[plane].storage.state.alloc_size) { anv_state_pool_free(&device->bindless_surface_state_pool, iview->planes[plane].storage.state); } } } vk_image_view_finish(&iview->vk); } VkResult anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImageView *pView) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_image_view *iview; iview = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*iview), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (iview == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); anv_image_view_init(device, iview, pCreateInfo, NULL); *pView = anv_image_view_to_handle(iview); return VK_SUCCESS; } void anv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator) { ANV_FROM_HANDLE(anv_image_view, iview, _iview); if (!iview) return; anv_image_view_finish(iview); vk_free2(&iview->vk.base.device->alloc, pAllocator, iview); }