/* * Copyright (C) 2019-2022 Collabora, Ltd. * Copyright (C) 2018-2019 Alyssa Rosenzweig * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "util/log.h" #include "util/macros.h" #include "util/u_math.h" #include "pan_texture.h" /* * List of supported modifiers, in descending order of preference. AFBC is * faster than u-interleaved tiling which is faster than linear. Within AFBC, * enabling the YUV-like transform is typically a win where possible. * AFRC is only used if explicitely asked for (only for RGB formats). */ uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = { DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC | AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR), DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_TILED | AFBC_FORMAT_MOD_SC | AFBC_FORMAT_MOD_SPARSE), DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE | AFBC_FORMAT_MOD_YTR), DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE), DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED, DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_16)), DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_24)), DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_32)), DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_16) | AFRC_FORMAT_MOD_LAYOUT_SCAN), DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_24) | AFRC_FORMAT_MOD_LAYOUT_SCAN), DRM_FORMAT_MOD_ARM_AFRC( AFRC_FORMAT_MOD_CU_SIZE_P0(AFRC_FORMAT_MOD_CU_SIZE_32) | AFRC_FORMAT_MOD_LAYOUT_SCAN), }; /* Table of AFBC superblock sizes */ static const struct pan_block_size afbc_superblock_sizes[] = { [AFBC_FORMAT_MOD_BLOCK_SIZE_16x16] = {16, 16}, [AFBC_FORMAT_MOD_BLOCK_SIZE_32x8] = {32, 8}, [AFBC_FORMAT_MOD_BLOCK_SIZE_64x4] = {64, 4}, }; /* * Given an AFBC modifier, return the superblock size. * * We do not yet have any use cases for multiplanar YCBCr formats with different * superblock sizes on the luma and chroma planes. These formats are unsupported * for now. */ struct pan_block_size panfrost_afbc_superblock_size(uint64_t modifier) { unsigned index = (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK); assert(drm_is_afbc(modifier)); assert(index < ARRAY_SIZE(afbc_superblock_sizes)); return afbc_superblock_sizes[index]; } /* * Given an AFBC modifier, return the width of the superblock. */ unsigned panfrost_afbc_superblock_width(uint64_t modifier) { return panfrost_afbc_superblock_size(modifier).width; } /* * Given an AFBC modifier, return the height of the superblock. */ unsigned panfrost_afbc_superblock_height(uint64_t modifier) { return panfrost_afbc_superblock_size(modifier).height; } /* * Given an AFBC modifier, return if "wide blocks" are used. Wide blocks are * defined as superblocks wider than 16 pixels, the minimum (and default) super * block width. */ bool panfrost_afbc_is_wide(uint64_t modifier) { return panfrost_afbc_superblock_width(modifier) > 16; } /* * Given an AFBC modifier, return the subblock size (subdivision of a * superblock). This is always 4x4 for now as we only support one AFBC * superblock layout. */ struct pan_block_size panfrost_afbc_subblock_size(uint64_t modifier) { return (struct pan_block_size){4, 4}; } /* * Given an AFRC modifier, return whether the layout is optimized for scan * order (vs rotation order). */ bool panfrost_afrc_is_scan(uint64_t modifier) { return modifier & AFRC_FORMAT_MOD_LAYOUT_SCAN; } struct pan_block_size panfrost_afrc_clump_size(enum pipe_format format, bool scan) { struct pan_afrc_format_info finfo = panfrost_afrc_get_format_info(format); switch (finfo.num_comps) { case 1: return scan ? (struct pan_block_size){16, 4} : (struct pan_block_size){8, 8}; case 2: return (struct pan_block_size){8, 4}; case 3: case 4: return (struct pan_block_size){4, 4}; default: assert(0); return (struct pan_block_size){0, 0}; } } static struct pan_block_size panfrost_afrc_layout_size(uint64_t modifier) { if (panfrost_afrc_is_scan(modifier)) return (struct pan_block_size){16, 4}; else return (struct pan_block_size){8, 8}; } struct pan_block_size panfrost_afrc_tile_size(enum pipe_format format, uint64_t modifier) { bool scan = panfrost_afrc_is_scan(modifier); struct pan_block_size clump_sz = panfrost_afrc_clump_size(format, scan); struct pan_block_size layout_sz = panfrost_afrc_layout_size(modifier); return (struct pan_block_size){clump_sz.width * layout_sz.width, clump_sz.height * layout_sz.height}; } unsigned panfrost_afrc_block_size_from_modifier(uint64_t modifier) { switch (modifier & AFRC_FORMAT_MOD_CU_SIZE_MASK) { case AFRC_FORMAT_MOD_CU_SIZE_16: return 16; case AFRC_FORMAT_MOD_CU_SIZE_24: return 24; case AFRC_FORMAT_MOD_CU_SIZE_32: return 32; default: unreachable("invalid coding unit size flag in modifier"); }; } static unsigned panfrost_afrc_buffer_alignment_from_modifier(uint64_t modifier) { switch (modifier & AFRC_FORMAT_MOD_CU_SIZE_MASK) { case AFRC_FORMAT_MOD_CU_SIZE_16: return 1024; case AFRC_FORMAT_MOD_CU_SIZE_24: return 512; case AFRC_FORMAT_MOD_CU_SIZE_32: return 2048; default: unreachable("invalid coding unit size flag in modifier"); }; } /* * Determine the number of bytes between rows of paging tiles in an AFRC image */ uint32_t pan_afrc_row_stride(enum pipe_format format, uint64_t modifier, uint32_t width) { struct pan_block_size tile_size = panfrost_afrc_tile_size(format, modifier); unsigned block_size = panfrost_afrc_block_size_from_modifier(modifier); return (width / tile_size.width) * block_size * AFRC_CLUMPS_PER_TILE; } /* * Given a format, determine the tile size used for u-interleaving. For formats * that are already block compressed, this is 4x4. For all other formats, this * is 16x16, hence the modifier name. */ static inline struct pan_block_size panfrost_u_interleaved_tile_size(enum pipe_format format) { if (util_format_is_compressed(format)) return (struct pan_block_size){4, 4}; else return (struct pan_block_size){16, 16}; } /* * Determine the block size used for interleaving. For u-interleaving, this is * the tile size. For AFBC, this is the superblock size. For AFRC, this is the * paging tile size. For linear textures, this is trivially 1x1. */ struct pan_block_size panfrost_block_size(uint64_t modifier, enum pipe_format format) { if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) return panfrost_u_interleaved_tile_size(format); else if (drm_is_afbc(modifier)) return panfrost_afbc_superblock_size(modifier); else if (drm_is_afrc(modifier)) return panfrost_afrc_tile_size(format, modifier); else return (struct pan_block_size){1, 1}; } /* * Determine the tile size used by AFBC. This tiles superblocks themselves. * Current GPUs support either 8x8 tiling or no tiling (1x1) */ static inline unsigned pan_afbc_tile_size(uint64_t modifier) { return (modifier & AFBC_FORMAT_MOD_TILED) ? 8 : 1; } /* * Determine the number of bytes between header rows for an AFBC image. For an * image with linear headers, this is simply the number of header blocks * (=superblocks) per row times the numbers of bytes per header block. For an * image with tiled headers, this is multipled by the number of rows of * header blocks are in a tile together. */ uint32_t pan_afbc_row_stride(uint64_t modifier, uint32_t width) { unsigned block_width = panfrost_afbc_superblock_width(modifier); return (width / block_width) * pan_afbc_tile_size(modifier) * AFBC_HEADER_BYTES_PER_TILE; } /* * Determine the number of header blocks between header rows. This is equal to * the number of bytes between header rows divided by the bytes per blocks of a * header tile. This is also divided by the tile size to give a "line stride" in * blocks, rather than a real row stride. This is required by Bifrost. */ uint32_t pan_afbc_stride_blocks(uint64_t modifier, uint32_t row_stride_bytes) { return row_stride_bytes / (AFBC_HEADER_BYTES_PER_TILE * pan_afbc_tile_size(modifier)); } /* * Determine the required alignment for the slice offset of an image. For * now, this is always aligned on 64-byte boundaries. */ uint32_t pan_slice_align(uint64_t modifier) { return 64; } /* * Determine the required alignment for the body offset of an AFBC image. For * now, this depends only on whether tiling is in use. These minimum alignments * are required on all current GPUs. */ uint32_t pan_afbc_body_align(uint64_t modifier) { return (modifier & AFBC_FORMAT_MOD_TILED) ? 4096 : 64; } static inline unsigned format_minimum_alignment(unsigned arch, enum pipe_format format, uint64_t mod) { if (drm_is_afbc(mod)) return 16; if (drm_is_afrc(mod)) return panfrost_afrc_buffer_alignment_from_modifier(mod); if (arch < 7) return 64; switch (format) { /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes */ case PIPE_FORMAT_R8_G8B8_420_UNORM: case PIPE_FORMAT_G8_B8R8_420_UNORM: case PIPE_FORMAT_R8_G8_B8_420_UNORM: case PIPE_FORMAT_R8_B8_G8_420_UNORM: return 16; default: return 64; } } /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. * Checksumming is believed to be a CRC variant (CRC64 based on the size?). * This feature is also known as "transaction elimination". */ #define CHECKSUM_TILE_WIDTH 16 #define CHECKSUM_TILE_HEIGHT 16 #define CHECKSUM_BYTES_PER_TILE 8 unsigned panfrost_compute_checksum_size(struct pan_image_slice_layout *slice, unsigned width, unsigned height) { unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH); unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT); slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE; return slice->crc.stride * tile_count_y; } unsigned panfrost_get_layer_stride(const struct pan_image_layout *layout, unsigned level) { if (layout->dim != MALI_TEXTURE_DIMENSION_3D) return layout->array_stride; else if (drm_is_afbc(layout->modifier)) return layout->slices[level].afbc.surface_stride; else return layout->slices[level].surface_stride; } unsigned panfrost_get_legacy_stride(const struct pan_image_layout *layout, unsigned level) { unsigned row_stride = layout->slices[level].row_stride; struct pan_block_size block_size = panfrost_block_size(layout->modifier, layout->format); if (drm_is_afbc(layout->modifier)) { unsigned width = u_minify(layout->width, level); unsigned alignment = block_size.width * pan_afbc_tile_size(layout->modifier); width = ALIGN_POT(width, alignment); return width * util_format_get_blocksize(layout->format); } else if (drm_is_afrc(layout->modifier)) { struct pan_block_size tile_size = panfrost_afrc_tile_size(layout->format, layout->modifier); return row_stride / tile_size.height; } else { return row_stride / block_size.height; } } unsigned panfrost_from_legacy_stride(unsigned legacy_stride, enum pipe_format format, uint64_t modifier) { struct pan_block_size block_size = panfrost_block_size(modifier, format); if (drm_is_afbc(modifier)) { unsigned width = legacy_stride / util_format_get_blocksize(format); return pan_afbc_row_stride(modifier, width); } else if (drm_is_afrc(modifier)) { struct pan_block_size tile_size = panfrost_afrc_tile_size(format, modifier); return legacy_stride * tile_size.height; } else { return legacy_stride * block_size.height; } } /* Computes the offset into a texture at a particular level/face. Add to * the base address of a texture to get the address to that level/face */ unsigned panfrost_texture_offset(const struct pan_image_layout *layout, unsigned level, unsigned array_idx, unsigned surface_idx) { return layout->slices[level].offset + (array_idx * layout->array_stride) + (surface_idx * layout->slices[level].surface_stride); } bool pan_image_layout_init(unsigned arch, struct pan_image_layout *layout, const struct pan_image_explicit_layout *explicit_layout) { /* Explicit stride only work with non-mipmap, non-array, single-sample * 2D image without CRC. */ if (explicit_layout && (layout->depth > 1 || layout->nr_samples > 1 || layout->array_size > 1 || layout->dim != MALI_TEXTURE_DIMENSION_2D || layout->nr_slices > 1 || layout->crc)) return false; bool afbc = drm_is_afbc(layout->modifier); bool afrc = drm_is_afrc(layout->modifier); int align_req = format_minimum_alignment(arch, layout->format, layout->modifier); /* Mandate alignment */ if (explicit_layout) { bool rejected = false; int align_mask = align_req - 1; if (arch >= 7) { rejected = ((explicit_layout->offset & align_mask) || (explicit_layout->row_stride & align_mask)); } else { rejected = (explicit_layout->offset & align_mask); } if (rejected) { mesa_loge( "panfrost: rejecting image due to unsupported offset or stride " "alignment.\n"); return false; } } unsigned fmt_blocksize = util_format_get_blocksize(layout->format); /* MSAA is implemented as a 3D texture with z corresponding to the * sample #, horrifyingly enough */ assert(layout->depth == 1 || layout->nr_samples == 1); bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR; bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D; unsigned offset = explicit_layout ? explicit_layout->offset : 0; struct pan_block_size block_size = panfrost_block_size(layout->modifier, layout->format); unsigned width = layout->width; unsigned height = layout->height; unsigned depth = layout->depth; unsigned align_w = block_size.width; unsigned align_h = block_size.height; /* For tiled AFBC, align to tiles of superblocks (this can be large) */ if (afbc) { align_w *= pan_afbc_tile_size(layout->modifier); align_h *= pan_afbc_tile_size(layout->modifier); } for (unsigned l = 0; l < layout->nr_slices; ++l) { struct pan_image_slice_layout *slice = &layout->slices[l]; unsigned effective_width = ALIGN_POT(util_format_get_nblocksx(layout->format, width), align_w); unsigned effective_height = ALIGN_POT(util_format_get_nblocksy(layout->format, height), align_h); unsigned row_stride; /* Align levels to cache-line as a performance improvement for * linear/tiled and as a requirement for AFBC */ offset = ALIGN_POT(offset, pan_slice_align(layout->modifier)); slice->offset = offset; if (afrc) { row_stride = pan_afrc_row_stride(layout->format, layout->modifier, effective_width); } else { row_stride = fmt_blocksize * effective_width * block_size.height; } /* On v7+ row_stride and offset alignment requirement are equal */ if (arch >= 7) { row_stride = ALIGN_POT(row_stride, align_req); } if (explicit_layout && !afbc && !afrc) { /* Make sure the explicit stride is valid */ if (explicit_layout->row_stride < row_stride) { mesa_loge("panfrost: rejecting image due to invalid row stride.\n"); return false; } row_stride = explicit_layout->row_stride; } else if (linear) { /* Keep lines alignment on 64 byte for performance */ row_stride = ALIGN_POT(row_stride, 64); } unsigned slice_one_size = row_stride * (effective_height / block_size.height); /* Compute AFBC sizes if necessary */ if (afbc) { slice->row_stride = pan_afbc_row_stride(layout->modifier, effective_width); slice->afbc.stride = effective_width / block_size.width; slice->afbc.nr_blocks = slice->afbc.stride * (effective_height / block_size.height); slice->afbc.header_size = ALIGN_POT(slice->row_stride * (effective_height / align_h), pan_afbc_body_align(layout->modifier)); if (explicit_layout && explicit_layout->row_stride < slice->row_stride) { mesa_loge("panfrost: rejecting image due to invalid row stride.\n"); return false; } /* AFBC body size */ slice->afbc.body_size = slice_one_size; /* 3D AFBC resources have all headers placed at the * beginning instead of having them split per depth * level */ if (is_3d) { slice->afbc.surface_stride = slice->afbc.header_size; slice->afbc.header_size *= depth; slice->afbc.body_size *= depth; offset += slice->afbc.header_size; } else { slice_one_size += slice->afbc.header_size; slice->afbc.surface_stride = slice_one_size; } } else { slice->row_stride = row_stride; } unsigned slice_full_size = slice_one_size * depth * layout->nr_samples; slice->surface_stride = slice_one_size; /* Compute AFBC sizes if necessary */ offset += slice_full_size; slice->size = slice_full_size; /* Add a checksum region if necessary */ if (layout->crc) { slice->crc.size = panfrost_compute_checksum_size(slice, width, height); slice->crc.offset = offset; offset += slice->crc.size; slice->size += slice->crc.size; } width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } /* Arrays and cubemaps have the entire miptree duplicated */ layout->array_stride = ALIGN_POT(offset, 64); if (explicit_layout) layout->data_size = offset; else layout->data_size = ALIGN_POT( (uint64_t)layout->array_stride * (uint64_t)layout->array_size, 4096); return true; } void pan_iview_get_surface(const struct pan_image_view *iview, unsigned level, unsigned layer, unsigned sample, struct pan_surface *surf) { const struct pan_image *image = pan_image_view_get_plane(iview, 0); level += iview->first_level; assert(level < image->layout.nr_slices); layer += iview->first_layer; bool is_3d = image->layout.dim == MALI_TEXTURE_DIMENSION_3D; const struct pan_image_slice_layout *slice = &image->layout.slices[level]; mali_ptr base = image->data.base + image->data.offset; if (drm_is_afbc(image->layout.modifier)) { assert(!sample); if (is_3d) { ASSERTED unsigned depth = u_minify(image->layout.depth, level); assert(layer < depth); surf->afbc.header = base + slice->offset + (layer * slice->afbc.surface_stride); surf->afbc.body = base + slice->offset + slice->afbc.header_size + (slice->surface_stride * layer); } else { assert(layer < image->layout.array_size); surf->afbc.header = base + panfrost_texture_offset(&image->layout, level, layer, 0); surf->afbc.body = surf->afbc.header + slice->afbc.header_size; } } else { unsigned array_idx = is_3d ? 0 : layer; unsigned surface_idx = is_3d ? layer : sample; surf->data = base + panfrost_texture_offset(&image->layout, level, array_idx, surface_idx); } }