/* * Copyright 2022 Alyssa Rosenzweig * SPDX-License-Identifier: MIT */ #include "agx_tilebuffer.h" #include #include "util/bitscan.h" #include "util/format/u_format.h" #include "agx_usc.h" #include "layout.h" /* Maximum number of bytes per tile on G13G. This may change in future versions * of the architecture. */ #define MAX_BYTES_PER_TILE (32768 - 1) /* Maximum bytes per sample in the tilebuffer. Greater allocations require * spilling render targets to memory. */ #define MAX_BYTES_PER_SAMPLE (64) /* Minimum tile size in pixels, architectural. */ #define MIN_TILE_SIZE_PX (16 * 16) /* Select the largest tile size that fits */ static struct agx_tile_size agx_select_tile_size(unsigned bytes_per_pixel) { /* clang-format off */ struct agx_tile_size sizes[] = { { 32, 32 }, { 32, 16 }, { 16, 16 } }; /* clang-format on */ for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) { struct agx_tile_size size = sizes[i]; if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE) return size; } unreachable("No supported tile size meets the bytes per pixel requirement"); } static unsigned agx_shared_layout_from_tile_size(struct agx_tile_size t) { if (t.width == 32 && t.height == 32) return AGX_SHARED_LAYOUT_32X32; else if (t.width == 32 && t.height == 16) return AGX_SHARED_LAYOUT_32X16; else if (t.width == 16 && t.height == 16) return AGX_SHARED_LAYOUT_16X16; else unreachable("Invalid tile size"); } struct agx_tilebuffer_layout agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs, uint8_t nr_samples, bool layered) { struct agx_tilebuffer_layout tib = { .nr_samples = nr_samples, .layered = layered, }; uint32_t offset_B = 0; for (unsigned rt = 0; rt < nr_cbufs; ++rt) { tib.logical_format[rt] = formats[rt]; /* If there are gaps in the layout, don't allocate holes. Obscure, * PIPE_FORMAT_NONE has a size of 1, not 0. */ if (formats[rt] == PIPE_FORMAT_NONE) continue; /* Require natural alignment for tilebuffer allocations. This could be * optimized, but this shouldn't be a problem in practice. */ enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt); unsigned align_B = util_format_get_blocksize(physical_fmt); assert(util_is_power_of_two_nonzero(align_B) && util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) && align_B < MAX_BYTES_PER_SAMPLE && "max bytes per sample divisible by alignment"); offset_B = ALIGN_POT(offset_B, align_B); assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above"); /* Determine the size, if we were to allocate this render target to the * tilebuffer as desired. */ unsigned nr = util_format_get_nr_components(physical_fmt) == 1 ? util_format_get_nr_components(formats[rt]) : 1; unsigned size_B = align_B * nr; unsigned new_offset_B = offset_B + size_B; /* If allocating this render target would exceed any tilebuffer limits, we * need to spill it to memory. We continue processing in case there are * smaller render targets after that would still fit. Otherwise, we * allocate it to the tilebuffer. * * TODO: Suboptimal, we might be able to reorder render targets to * avoid fragmentation causing spilling. */ bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) && (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX * nr_samples) <= MAX_BYTES_PER_TILE; if (fits) { tib._offset_B[rt] = offset_B; offset_B = new_offset_B; } else { tib.spilled[rt] = true; } } assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant"); /* Multisampling needs a nonempty allocation. * XXX: Check this against hw */ if (nr_samples > 1) offset_B = MAX2(offset_B, 1); tib.sample_size_B = ALIGN_POT(offset_B, 8); tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples); agx_tilebuffer_pack_usc(&tib); return tib; } enum pipe_format agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt) { return ail_pixel_format[tib->logical_format[rt]].renderable; } bool agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt) { /* We don't bother support masking with spilled render targets. This might be * optimized in the future but spilling is so rare anyway it's not worth it. */ if (tib->spilled[rt]) return false; enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt); return ail_isa_format_supports_mask((enum ail_isa_format)fmt); } uint32_t agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib) { return tib->sample_size_B * tib->nr_samples * tib->tile_size.width * tib->tile_size.height; } void agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout *tib) { agx_pack(&tib->usc, USC_SHARED, cfg) { if (tib->nr_samples > 0) { cfg.uses_shared_memory = true; cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size); cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8; cfg.sample_count = tib->nr_samples; cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib); } else { cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE; cfg.bytes_per_threadgroup = 65536; } } }