/* * Copyright © 2022 Collabora Ltd. and Red Hat Inc. * SPDX-License-Identifier: MIT */ #include "nvk_cmd_buffer.h" #include "nvk_buffer.h" #include "nvk_device.h" #include "nvk_device_memory.h" #include "nvk_entrypoints.h" #include "nvk_format.h" #include "nvk_image.h" #include "nvk_image_view.h" #include "nvk_physical_device.h" #include "vk_format.h" #include "nvtypes.h" #include "nv_push_cl902d.h" #include "nv_push_cl90b5.h" #include "nv_push_clc1b5.h" static inline uint16_t nvk_cmd_buffer_copy_cls(struct nvk_cmd_buffer *cmd) { struct nvk_device *dev = nvk_cmd_buffer_device(cmd); struct nvk_physical_device *pdev = nvk_device_physical(dev); return pdev->info.cls_copy; } struct nouveau_copy_buffer { uint64_t base_addr; VkImageType image_type; struct nil_Offset4D_Elements offset_el; struct nil_Extent4D_Elements extent_el; uint32_t bpp; uint32_t row_stride; uint32_t array_stride; struct nil_tiling tiling; }; struct nouveau_copy { struct nouveau_copy_buffer src; struct nouveau_copy_buffer dst; struct nouveau_copy_remap { uint8_t comp_size; uint8_t dst[4]; } remap; struct nil_Extent4D_Elements extent_el; }; static struct nouveau_copy_buffer nouveau_copy_rect_buffer(struct nvk_buffer *buf, VkDeviceSize offset, struct vk_image_buffer_layout buffer_layout) { return (struct nouveau_copy_buffer) { .base_addr = nvk_buffer_address(buf, offset), .image_type = VK_IMAGE_TYPE_2D, .bpp = buffer_layout.element_size_B, .row_stride = buffer_layout.row_stride_B, .array_stride = buffer_layout.image_stride_B, }; } static struct nil_Offset4D_Pixels vk_to_nil_offset(VkOffset3D offset, uint32_t base_array_layer) { return (struct nil_Offset4D_Pixels) { .x = offset.x, .y = offset.y, .z = offset.z, .a = base_array_layer }; } static struct nil_Extent4D_Pixels vk_to_nil_extent(VkExtent3D extent, uint32_t array_layers) { return (struct nil_Extent4D_Pixels) { .width = extent.width, .height = extent.height, .depth = extent.depth, .array_len = array_layers, }; } static struct nouveau_copy_buffer nouveau_copy_rect_image(const struct nvk_image *img, const struct nvk_image_plane *plane, VkOffset3D offset_px, const VkImageSubresourceLayers *sub_res) { const struct nil_Extent4D_Pixels lvl_extent4d_px = nil_image_level_extent_px(&plane->nil, sub_res->mipLevel); offset_px = vk_image_sanitize_offset(&img->vk, offset_px); const struct nil_Offset4D_Pixels offset4d_px = vk_to_nil_offset(offset_px, sub_res->baseArrayLayer); struct nouveau_copy_buffer buf = { .base_addr = nvk_image_plane_base_address(plane) + plane->nil.levels[sub_res->mipLevel].offset_B, .image_type = img->vk.image_type, .offset_el = nil_offset4d_px_to_el(offset4d_px, plane->nil.format, plane->nil.sample_layout), .extent_el = nil_extent4d_px_to_el(lvl_extent4d_px, plane->nil.format, plane->nil.sample_layout), .bpp = util_format_get_blocksize(plane->nil.format.p_format), .row_stride = plane->nil.levels[sub_res->mipLevel].row_stride_B, .array_stride = plane->nil.array_stride_B, .tiling = plane->nil.levels[sub_res->mipLevel].tiling, }; return buf; } static struct nouveau_copy_remap nouveau_copy_remap_format(VkFormat format) { /* Pick an arbitrary component size. It doesn't matter what size we * pick since we're just doing a copy, as long as it's no more than 4B * and divides the format size. */ unsigned comp_size = vk_format_get_blocksize(format); if (comp_size % 3 == 0) { comp_size /= 3; assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 4); } else { assert(util_is_power_of_two_nonzero(comp_size) && comp_size <= 16); comp_size = MIN2(comp_size, 4); } return (struct nouveau_copy_remap) { .comp_size = comp_size, .dst = { 0, 1, 2, 3 }, }; } static uint32_t to_90b5_remap_comp_size(uint8_t comp_size) { static const uint8_t to_90b5[] = { [1] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE, [2] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO, [3] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE, [4] = NV90B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR, }; assert(comp_size > 0 && comp_size < ARRAY_SIZE(to_90b5)); uint32_t size_90b5 = comp_size - 1; assert(size_90b5 == to_90b5[comp_size]); return size_90b5; } static uint32_t to_90b5_remap_num_comps(uint8_t num_comps) { static const uint8_t to_90b5[] = { [1] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE, [2] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO, [3] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE, [4] = NV90B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR, }; assert(num_comps > 0 && num_comps < ARRAY_SIZE(to_90b5)); uint32_t num_comps_90b5 = num_comps - 1; assert(num_comps_90b5 == to_90b5[num_comps]); return num_comps_90b5; } static void nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy) { uint32_t src_bw, dst_bw; if (copy->remap.comp_size > 0) { struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); assert(copy->src.bpp % copy->remap.comp_size == 0); assert(copy->dst.bpp % copy->remap.comp_size == 0); uint32_t num_src_comps = copy->src.bpp / copy->remap.comp_size; uint32_t num_dst_comps = copy->dst.bpp / copy->remap.comp_size; /* When running with component remapping enabled, most X/Y dimensions * are in units of blocks. */ src_bw = dst_bw = 1; P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, { .dst_x = copy->remap.dst[0], .dst_y = copy->remap.dst[1], .dst_z = copy->remap.dst[2], .dst_w = copy->remap.dst[3], .component_size = to_90b5_remap_comp_size(copy->remap.comp_size), .num_src_components = to_90b5_remap_comp_size(num_src_comps), .num_dst_components = to_90b5_remap_comp_size(num_dst_comps), }); } else { /* When component remapping is disabled, dimensions are in units of * bytes (an implicit block widht of 1B). */ assert(copy->src.bpp == copy->dst.bpp); src_bw = copy->src.bpp; dst_bw = copy->dst.bpp; } assert(copy->extent_el.depth == 1 || copy->extent_el.array_len == 1); uint32_t layers = MAX2(copy->extent_el.depth, copy->extent_el.array_len); for (unsigned z = 0; z < layers; z++) { VkDeviceSize src_addr = copy->src.base_addr; VkDeviceSize dst_addr = copy->dst.base_addr; if (copy->src.image_type != VK_IMAGE_TYPE_3D) src_addr += (z + copy->src.offset_el.a) * copy->src.array_stride; if (copy->dst.image_type != VK_IMAGE_TYPE_3D) dst_addr += (z + copy->dst.offset_el.a) * copy->dst.array_stride; if (!copy->src.tiling.is_tiled) { src_addr += copy->src.offset_el.x * copy->src.bpp + copy->src.offset_el.y * copy->src.row_stride; } if (!copy->dst.tiling.is_tiled) { dst_addr += copy->dst.offset_el.x * copy->dst.bpp + copy->dst.offset_el.y * copy->dst.row_stride; } struct nv_push *p = nvk_cmd_buffer_push(cmd, 31); P_MTHD(p, NV90B5, OFFSET_IN_UPPER); P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32); P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff); P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32); P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff); P_NV90B5_PITCH_IN(p, copy->src.row_stride); P_NV90B5_PITCH_OUT(p, copy->dst.row_stride); P_NV90B5_LINE_LENGTH_IN(p, copy->extent_el.width * src_bw); P_NV90B5_LINE_COUNT(p, copy->extent_el.height); uint32_t src_layout = 0, dst_layout = 0; if (copy->src.tiling.is_tiled) { P_MTHD(p, NV90B5, SET_SRC_BLOCK_SIZE); P_NV90B5_SET_SRC_BLOCK_SIZE(p, { .width = 0, /* Tiles are always 1 GOB wide */ .height = copy->src.tiling.y_log2, .depth = copy->src.tiling.z_log2, .gob_height = copy->src.tiling.gob_height_is_8 ? GOB_HEIGHT_GOB_HEIGHT_FERMI_8 : GOB_HEIGHT_GOB_HEIGHT_TESLA_4, }); /* We use the stride for copies because the copy hardware has no * concept of a tile width. Instead, we just set the width to the * stride divided by bpp. */ uint32_t src_stride_el = copy->src.row_stride / copy->src.bpp; P_NV90B5_SET_SRC_WIDTH(p, src_stride_el * src_bw); P_NV90B5_SET_SRC_HEIGHT(p, copy->src.extent_el.height); P_NV90B5_SET_SRC_DEPTH(p, copy->src.extent_el.depth); if (copy->src.image_type == VK_IMAGE_TYPE_3D) P_NV90B5_SET_SRC_LAYER(p, z + copy->src.offset_el.z); else P_NV90B5_SET_SRC_LAYER(p, 0); if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) { P_MTHD(p, NVC1B5, SRC_ORIGIN_X); P_NVC1B5_SRC_ORIGIN_X(p, copy->src.offset_el.x * src_bw); P_NVC1B5_SRC_ORIGIN_Y(p, copy->src.offset_el.y); } else { P_MTHD(p, NV90B5, SET_SRC_ORIGIN); P_NV90B5_SET_SRC_ORIGIN(p, { .x = copy->src.offset_el.x * src_bw, .y = copy->src.offset_el.y }); } src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR; } else { src_addr += copy->src.array_stride; src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH; } if (copy->dst.tiling.is_tiled) { P_MTHD(p, NV90B5, SET_DST_BLOCK_SIZE); P_NV90B5_SET_DST_BLOCK_SIZE(p, { .width = 0, /* Tiles are always 1 GOB wide */ .height = copy->dst.tiling.y_log2, .depth = copy->dst.tiling.z_log2, .gob_height = copy->dst.tiling.gob_height_is_8 ? GOB_HEIGHT_GOB_HEIGHT_FERMI_8 : GOB_HEIGHT_GOB_HEIGHT_TESLA_4, }); /* We use the stride for copies because the copy hardware has no * concept of a tile width. Instead, we just set the width to the * stride divided by bpp. */ uint32_t dst_stride_el = copy->dst.row_stride / copy->dst.bpp; P_NV90B5_SET_DST_WIDTH(p, dst_stride_el * dst_bw); P_NV90B5_SET_DST_HEIGHT(p, copy->dst.extent_el.height); P_NV90B5_SET_DST_DEPTH(p, copy->dst.extent_el.depth); if (copy->dst.image_type == VK_IMAGE_TYPE_3D) P_NV90B5_SET_DST_LAYER(p, z + copy->dst.offset_el.z); else P_NV90B5_SET_DST_LAYER(p, 0); if (nvk_cmd_buffer_copy_cls(cmd) >= PASCAL_DMA_COPY_B) { P_MTHD(p, NVC1B5, DST_ORIGIN_X); P_NVC1B5_DST_ORIGIN_X(p, copy->dst.offset_el.x * dst_bw); P_NVC1B5_DST_ORIGIN_Y(p, copy->dst.offset_el.y); } else { P_MTHD(p, NV90B5, SET_DST_ORIGIN); P_NV90B5_SET_DST_ORIGIN(p, { .x = copy->dst.offset_el.x * dst_bw, .y = copy->dst.offset_el.y }); } dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR; } else { dst_addr += copy->dst.array_stride; dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH; } P_IMMD(p, NV90B5, LAUNCH_DMA, { .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, .multi_line_enable = MULTI_LINE_ENABLE_TRUE, .flush_enable = FLUSH_ENABLE_TRUE, .src_memory_layout = src_layout, .dst_memory_layout = dst_layout, .remap_enable = copy->remap.comp_size > 0, }); } } VKAPI_ATTR void VKAPI_CALL nvk_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferInfo->srcBuffer); VK_FROM_HANDLE(nvk_buffer, dst, pCopyBufferInfo->dstBuffer); for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) { const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r]; uint64_t src_addr = nvk_buffer_address(src, region->srcOffset); uint64_t dst_addr = nvk_buffer_address(dst, region->dstOffset); uint64_t size = region->size; while (size) { struct nv_push *p = nvk_cmd_buffer_push(cmd, 10); P_MTHD(p, NV90B5, OFFSET_IN_UPPER); P_NV90B5_OFFSET_IN_UPPER(p, src_addr >> 32); P_NV90B5_OFFSET_IN_LOWER(p, src_addr & 0xffffffff); P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32); P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff); unsigned bytes = MIN2(size, 1 << 17); P_MTHD(p, NV90B5, LINE_LENGTH_IN); P_NV90B5_LINE_LENGTH_IN(p, bytes); P_NV90B5_LINE_COUNT(p, 1); P_IMMD(p, NV90B5, LAUNCH_DMA, { .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, .multi_line_enable = MULTI_LINE_ENABLE_TRUE, .flush_enable = FLUSH_ENABLE_TRUE, .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, }); src_addr += bytes; dst_addr += bytes; size -= bytes; } } } VKAPI_ATTR void VKAPI_CALL nvk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_buffer, src, pCopyBufferToImageInfo->srcBuffer); VK_FROM_HANDLE(nvk_image, dst, pCopyBufferToImageInfo->dstImage); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r]; struct vk_image_buffer_layout buffer_layout = vk_image_buffer_copy_layout(&dst->vk, region); const VkExtent3D extent_px = vk_image_sanitize_extent(&dst->vk, region->imageExtent); const uint32_t layer_count = vk_image_subresource_layer_count(&dst->vk, ®ion->imageSubresource); const struct nil_Extent4D_Pixels extent4d_px = vk_to_nil_extent(extent_px, layer_count); const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask; uint8_t plane = nvk_image_aspects_to_plane(dst, aspects); struct nouveau_copy copy = { .src = nouveau_copy_rect_buffer(src, region->bufferOffset, buffer_layout), .dst = nouveau_copy_rect_image(dst, &dst->planes[plane], region->imageOffset, ®ion->imageSubresource), .extent_el = nil_extent4d_px_to_el(extent4d_px, dst->planes[plane].nil.format, dst->planes[plane].nil.sample_layout), }; struct nouveau_copy copy2 = { 0 }; switch (dst->vk.format) { case VK_FORMAT_D32_SFLOAT_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 4; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } else { assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); copy2.dst = copy.dst; copy2.extent_el = copy.extent_el; copy.dst = copy2.src = nouveau_copy_rect_image(dst, &dst->stencil_copy_temp, region->imageOffset, ®ion->imageSubresource); copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; copy2.remap.comp_size = 2; copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE; copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X; copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } break; case VK_FORMAT_D24_UNORM_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } else { assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X; } break; default: copy.remap = nouveau_copy_remap_format(dst->vk.format); break; } nouveau_copy_rect(cmd, ©); if (copy2.extent_el.width > 0) nouveau_copy_rect(cmd, ©2); vk_foreach_struct_const(ext, region->pNext) { switch (ext->sType) { default: vk_debug_ignored_stype(ext->sType); break; } } } vk_foreach_struct_const(ext, pCopyBufferToImageInfo->pNext) { switch (ext->sType) { default: vk_debug_ignored_stype(ext->sType); break; } } } VKAPI_ATTR void VKAPI_CALL nvk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_image, src, pCopyImageToBufferInfo->srcImage); VK_FROM_HANDLE(nvk_buffer, dst, pCopyImageToBufferInfo->dstBuffer); for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) { const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r]; struct vk_image_buffer_layout buffer_layout = vk_image_buffer_copy_layout(&src->vk, region); const VkExtent3D extent_px = vk_image_sanitize_extent(&src->vk, region->imageExtent); const uint32_t layer_count = vk_image_subresource_layer_count(&src->vk, ®ion->imageSubresource); const struct nil_Extent4D_Pixels extent4d_px = vk_to_nil_extent(extent_px, layer_count); const VkImageAspectFlagBits aspects = region->imageSubresource.aspectMask; uint8_t plane = nvk_image_aspects_to_plane(src, aspects); struct nouveau_copy copy = { .src = nouveau_copy_rect_image(src, &src->planes[plane], region->imageOffset, ®ion->imageSubresource), .dst = nouveau_copy_rect_buffer(dst, region->bufferOffset, buffer_layout), .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[plane].nil.format, src->planes[plane].nil.sample_layout), }; struct nouveau_copy copy2 = { 0 }; switch (src->vk.format) { case VK_FORMAT_D32_SFLOAT_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 4; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } else { assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); copy2.dst = copy.dst; copy2.extent_el = copy.extent_el; copy.dst = copy2.src = nouveau_copy_rect_image(src, &src->stencil_copy_temp, region->imageOffset, ®ion->imageSubresource); copy.remap.comp_size = 2; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; copy2.remap.comp_size = 1; copy2.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy2.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy2.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy2.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } break; case VK_FORMAT_D24_UNORM_S8_UINT: if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } else { assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_SRC_W; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } break; default: copy.remap = nouveau_copy_remap_format(src->vk.format); break; } nouveau_copy_rect(cmd, ©); if (copy2.extent_el.width > 0) nouveau_copy_rect(cmd, ©2); vk_foreach_struct_const(ext, region->pNext) { switch (ext->sType) { default: vk_debug_ignored_stype(ext->sType); break; } } } vk_foreach_struct_const(ext, pCopyImageToBufferInfo->pNext) { switch (ext->sType) { default: vk_debug_ignored_stype(ext->sType); break; } } } void nvk_linear_render_copy(struct nvk_cmd_buffer *cmd, const struct nvk_image_view *iview, VkRect2D copy_rect, bool copy_to_tiled_shadow) { const struct nvk_image *image = (struct nvk_image *)iview->vk.image; const uint8_t ip = iview->planes[0].image_plane; const struct nvk_image_plane *src_plane = NULL, *dst_plane = NULL; if (copy_to_tiled_shadow) { src_plane = &image->planes[ip]; dst_plane = &image->linear_tiled_shadow; } else { src_plane = &image->linear_tiled_shadow; dst_plane = &image->planes[ip]; } const struct VkImageSubresourceLayers subres = { .aspectMask = iview->vk.aspects, .baseArrayLayer = iview->vk.base_array_layer, .layerCount = iview->vk.layer_count, .mipLevel = iview->vk.base_mip_level, }; const VkOffset3D offset_px = { .x = copy_rect.offset.x, .y = copy_rect.offset.y, .z = 0, }; const struct nil_Extent4D_Pixels extent4d_px = { .width = copy_rect.extent.width, .height = copy_rect.extent.height, .depth = 1, .array_len = 1, }; struct nouveau_copy copy = { .src = nouveau_copy_rect_image(image, src_plane, offset_px, &subres), .dst = nouveau_copy_rect_image(image, dst_plane, offset_px, &subres), .extent_el = nil_extent4d_px_to_el(extent4d_px, src_plane->nil.format, src_plane->nil.sample_layout), }; copy.remap = nouveau_copy_remap_format(image->vk.format); nouveau_copy_rect(cmd, ©); } VKAPI_ATTR void VKAPI_CALL nvk_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyImageInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_image, src, pCopyImageInfo->srcImage); VK_FROM_HANDLE(nvk_image, dst, pCopyImageInfo->dstImage); for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r]; /* From the Vulkan 1.3.217 spec: * * "When copying between compressed and uncompressed formats the * extent members represent the texel dimensions of the source image * and not the destination." */ const VkExtent3D extent_px = vk_image_sanitize_extent(&src->vk, region->extent); const uint32_t layer_count = vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource); const struct nil_Extent4D_Pixels extent4d_px = vk_to_nil_extent(extent_px, layer_count); const VkImageAspectFlagBits src_aspects = region->srcSubresource.aspectMask; uint8_t src_plane = nvk_image_aspects_to_plane(src, src_aspects); const VkImageAspectFlagBits dst_aspects = region->dstSubresource.aspectMask; uint8_t dst_plane = nvk_image_aspects_to_plane(dst, dst_aspects); struct nouveau_copy copy = { .src = nouveau_copy_rect_image(src, &src->planes[src_plane], region->srcOffset, ®ion->srcSubresource), .dst = nouveau_copy_rect_image(dst, &dst->planes[dst_plane], region->dstOffset, ®ion->dstSubresource), .extent_el = nil_extent4d_px_to_el(extent4d_px, src->planes[src_plane].nil.format, src->planes[src_plane].nil.sample_layout), }; assert(src_aspects == region->srcSubresource.aspectMask); switch (src->vk.format) { case VK_FORMAT_D24_UNORM_S8_UINT: if (src_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_X; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE; } else if (src_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) { copy.remap.comp_size = 1; copy.remap.dst[0] = NV90B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE; copy.remap.dst[1] = NV90B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE; copy.remap.dst[2] = NV90B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE; copy.remap.dst[3] = NV90B5_SET_REMAP_COMPONENTS_DST_W_SRC_W; } else { /* If we're copying both, there's nothing special to do */ assert(src_aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); } break; default: copy.remap = nouveau_copy_remap_format(src->vk.format); break; } nouveau_copy_rect(cmd, ©); } } VKAPI_ATTR void VKAPI_CALL nvk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer); uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset); size = vk_buffer_range(&dst_buffer->vk, dstOffset, size); uint32_t max_dim = 1 << 15; struct nv_push *p = nvk_cmd_buffer_push(cmd, 7); P_IMMD(p, NV90B5, SET_REMAP_CONST_A, data); P_IMMD(p, NV90B5, SET_REMAP_COMPONENTS, { .dst_x = DST_X_CONST_A, .dst_y = DST_Y_CONST_A, .dst_z = DST_Z_CONST_A, .dst_w = DST_W_CONST_A, .component_size = COMPONENT_SIZE_FOUR, .num_src_components = NUM_SRC_COMPONENTS_ONE, .num_dst_components = NUM_DST_COMPONENTS_ONE, }); P_MTHD(p, NV90B5, PITCH_IN); P_NV90B5_PITCH_IN(p, max_dim * 4); P_NV90B5_PITCH_OUT(p, max_dim * 4); while (size >= 4) { struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); P_MTHD(p, NV90B5, OFFSET_OUT_UPPER); P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32); P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff); uint64_t width, height; if (size >= (uint64_t)max_dim * (uint64_t)max_dim * 4) { width = height = max_dim; } else if (size >= max_dim * 4) { width = max_dim; height = size / (max_dim * 4); } else { width = size / 4; height = 1; } uint64_t dma_size = (uint64_t)width * (uint64_t)height * 4; assert(dma_size <= size); P_MTHD(p, NV90B5, LINE_LENGTH_IN); P_NV90B5_LINE_LENGTH_IN(p, width); P_NV90B5_LINE_COUNT(p, height); P_IMMD(p, NV90B5, LAUNCH_DMA, { .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, .multi_line_enable = height > 1, .flush_enable = FLUSH_ENABLE_TRUE, .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, .remap_enable = REMAP_ENABLE_TRUE, }); dst_addr += dma_size; size -= dma_size; } } VKAPI_ATTR void VKAPI_CALL nvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void *pData) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_buffer, dst, dstBuffer); uint64_t dst_addr = nvk_buffer_address(dst, dstOffset); uint64_t data_addr; nvk_cmd_buffer_upload_data(cmd, pData, dataSize, 64, &data_addr); struct nv_push *p = nvk_cmd_buffer_push(cmd, 10); P_MTHD(p, NV90B5, OFFSET_IN_UPPER); P_NV90B5_OFFSET_IN_UPPER(p, data_addr >> 32); P_NV90B5_OFFSET_IN_LOWER(p, data_addr & 0xffffffff); P_NV90B5_OFFSET_OUT_UPPER(p, dst_addr >> 32); P_NV90B5_OFFSET_OUT_LOWER(p, dst_addr & 0xffffffff); P_MTHD(p, NV90B5, LINE_LENGTH_IN); P_NV90B5_LINE_LENGTH_IN(p, dataSize); P_NV90B5_LINE_COUNT(p, 1); P_IMMD(p, NV90B5, LAUNCH_DMA, { .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED, .multi_line_enable = MULTI_LINE_ENABLE_TRUE, .flush_enable = FLUSH_ENABLE_TRUE, .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH, .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH, }); }