xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_tilebuffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2022 Alyssa Rosenzweig
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  */
5*61046927SAndroid Build Coastguard Worker 
6*61046927SAndroid Build Coastguard Worker #include "agx_tilebuffer.h"
7*61046927SAndroid Build Coastguard Worker #include <assert.h>
8*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
9*61046927SAndroid Build Coastguard Worker #include "util/format/u_format.h"
10*61046927SAndroid Build Coastguard Worker #include "agx_usc.h"
11*61046927SAndroid Build Coastguard Worker #include "layout.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker /* Maximum number of bytes per tile on G13G. This may change in future versions
14*61046927SAndroid Build Coastguard Worker  * of the architecture.
15*61046927SAndroid Build Coastguard Worker  */
16*61046927SAndroid Build Coastguard Worker #define MAX_BYTES_PER_TILE (32768 - 1)
17*61046927SAndroid Build Coastguard Worker 
18*61046927SAndroid Build Coastguard Worker /* Maximum bytes per sample in the tilebuffer. Greater allocations require
19*61046927SAndroid Build Coastguard Worker  * spilling render targets to memory.
20*61046927SAndroid Build Coastguard Worker  */
21*61046927SAndroid Build Coastguard Worker #define MAX_BYTES_PER_SAMPLE (64)
22*61046927SAndroid Build Coastguard Worker 
23*61046927SAndroid Build Coastguard Worker /* Minimum tile size in pixels, architectural. */
24*61046927SAndroid Build Coastguard Worker #define MIN_TILE_SIZE_PX (16 * 16)
25*61046927SAndroid Build Coastguard Worker 
26*61046927SAndroid Build Coastguard Worker /* Select the largest tile size that fits */
27*61046927SAndroid Build Coastguard Worker static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)28*61046927SAndroid Build Coastguard Worker agx_select_tile_size(unsigned bytes_per_pixel)
29*61046927SAndroid Build Coastguard Worker {
30*61046927SAndroid Build Coastguard Worker    /* clang-format off */
31*61046927SAndroid Build Coastguard Worker    struct agx_tile_size sizes[] = {
32*61046927SAndroid Build Coastguard Worker       { 32, 32 },
33*61046927SAndroid Build Coastguard Worker       { 32, 16 },
34*61046927SAndroid Build Coastguard Worker       { 16, 16 }
35*61046927SAndroid Build Coastguard Worker    };
36*61046927SAndroid Build Coastguard Worker    /* clang-format on */
37*61046927SAndroid Build Coastguard Worker 
38*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
39*61046927SAndroid Build Coastguard Worker       struct agx_tile_size size = sizes[i];
40*61046927SAndroid Build Coastguard Worker 
41*61046927SAndroid Build Coastguard Worker       if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
42*61046927SAndroid Build Coastguard Worker          return size;
43*61046927SAndroid Build Coastguard Worker    }
44*61046927SAndroid Build Coastguard Worker 
45*61046927SAndroid Build Coastguard Worker    unreachable("No supported tile size meets the bytes per pixel requirement");
46*61046927SAndroid Build Coastguard Worker }
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)49*61046927SAndroid Build Coastguard Worker agx_shared_layout_from_tile_size(struct agx_tile_size t)
50*61046927SAndroid Build Coastguard Worker {
51*61046927SAndroid Build Coastguard Worker    if (t.width == 32 && t.height == 32)
52*61046927SAndroid Build Coastguard Worker       return AGX_SHARED_LAYOUT_32X32;
53*61046927SAndroid Build Coastguard Worker    else if (t.width == 32 && t.height == 16)
54*61046927SAndroid Build Coastguard Worker       return AGX_SHARED_LAYOUT_32X16;
55*61046927SAndroid Build Coastguard Worker    else if (t.width == 16 && t.height == 16)
56*61046927SAndroid Build Coastguard Worker       return AGX_SHARED_LAYOUT_16X16;
57*61046927SAndroid Build Coastguard Worker    else
58*61046927SAndroid Build Coastguard Worker       unreachable("Invalid tile size");
59*61046927SAndroid Build Coastguard Worker }
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(const enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)62*61046927SAndroid Build Coastguard Worker agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
63*61046927SAndroid Build Coastguard Worker                             uint8_t nr_samples, bool layered)
64*61046927SAndroid Build Coastguard Worker {
65*61046927SAndroid Build Coastguard Worker    struct agx_tilebuffer_layout tib = {
66*61046927SAndroid Build Coastguard Worker       .nr_samples = nr_samples,
67*61046927SAndroid Build Coastguard Worker       .layered = layered,
68*61046927SAndroid Build Coastguard Worker    };
69*61046927SAndroid Build Coastguard Worker 
70*61046927SAndroid Build Coastguard Worker    uint32_t offset_B = 0;
71*61046927SAndroid Build Coastguard Worker 
72*61046927SAndroid Build Coastguard Worker    for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
73*61046927SAndroid Build Coastguard Worker       tib.logical_format[rt] = formats[rt];
74*61046927SAndroid Build Coastguard Worker 
75*61046927SAndroid Build Coastguard Worker       /* If there are gaps in the layout, don't allocate holes. Obscure,
76*61046927SAndroid Build Coastguard Worker        * PIPE_FORMAT_NONE has a size of 1, not 0.
77*61046927SAndroid Build Coastguard Worker        */
78*61046927SAndroid Build Coastguard Worker       if (formats[rt] == PIPE_FORMAT_NONE)
79*61046927SAndroid Build Coastguard Worker          continue;
80*61046927SAndroid Build Coastguard Worker 
81*61046927SAndroid Build Coastguard Worker       /* Require natural alignment for tilebuffer allocations. This could be
82*61046927SAndroid Build Coastguard Worker        * optimized, but this shouldn't be a problem in practice.
83*61046927SAndroid Build Coastguard Worker        */
84*61046927SAndroid Build Coastguard Worker       enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
85*61046927SAndroid Build Coastguard Worker       unsigned align_B = util_format_get_blocksize(physical_fmt);
86*61046927SAndroid Build Coastguard Worker       assert(util_is_power_of_two_nonzero(align_B) &&
87*61046927SAndroid Build Coastguard Worker              util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
88*61046927SAndroid Build Coastguard Worker              align_B < MAX_BYTES_PER_SAMPLE &&
89*61046927SAndroid Build Coastguard Worker              "max bytes per sample divisible by alignment");
90*61046927SAndroid Build Coastguard Worker 
91*61046927SAndroid Build Coastguard Worker       offset_B = ALIGN_POT(offset_B, align_B);
92*61046927SAndroid Build Coastguard Worker       assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
93*61046927SAndroid Build Coastguard Worker 
94*61046927SAndroid Build Coastguard Worker       /* Determine the size, if we were to allocate this render target to the
95*61046927SAndroid Build Coastguard Worker        * tilebuffer as desired.
96*61046927SAndroid Build Coastguard Worker        */
97*61046927SAndroid Build Coastguard Worker       unsigned nr = util_format_get_nr_components(physical_fmt) == 1
98*61046927SAndroid Build Coastguard Worker                        ? util_format_get_nr_components(formats[rt])
99*61046927SAndroid Build Coastguard Worker                        : 1;
100*61046927SAndroid Build Coastguard Worker 
101*61046927SAndroid Build Coastguard Worker       unsigned size_B = align_B * nr;
102*61046927SAndroid Build Coastguard Worker       unsigned new_offset_B = offset_B + size_B;
103*61046927SAndroid Build Coastguard Worker 
104*61046927SAndroid Build Coastguard Worker       /* If allocating this render target would exceed any tilebuffer limits, we
105*61046927SAndroid Build Coastguard Worker        * need to spill it to memory. We continue processing in case there are
106*61046927SAndroid Build Coastguard Worker        * smaller render targets after that would still fit. Otherwise, we
107*61046927SAndroid Build Coastguard Worker        * allocate it to the tilebuffer.
108*61046927SAndroid Build Coastguard Worker        *
109*61046927SAndroid Build Coastguard Worker        * TODO: Suboptimal, we might be able to reorder render targets to
110*61046927SAndroid Build Coastguard Worker        * avoid fragmentation causing spilling.
111*61046927SAndroid Build Coastguard Worker        */
112*61046927SAndroid Build Coastguard Worker       bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
113*61046927SAndroid Build Coastguard Worker                   (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
114*61046927SAndroid Build Coastguard Worker                    nr_samples) <= MAX_BYTES_PER_TILE;
115*61046927SAndroid Build Coastguard Worker 
116*61046927SAndroid Build Coastguard Worker       if (fits) {
117*61046927SAndroid Build Coastguard Worker          tib._offset_B[rt] = offset_B;
118*61046927SAndroid Build Coastguard Worker          offset_B = new_offset_B;
119*61046927SAndroid Build Coastguard Worker       } else {
120*61046927SAndroid Build Coastguard Worker          tib.spilled[rt] = true;
121*61046927SAndroid Build Coastguard Worker       }
122*61046927SAndroid Build Coastguard Worker    }
123*61046927SAndroid Build Coastguard Worker 
124*61046927SAndroid Build Coastguard Worker    assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
125*61046927SAndroid Build Coastguard Worker 
126*61046927SAndroid Build Coastguard Worker    /* Multisampling needs a nonempty allocation.
127*61046927SAndroid Build Coastguard Worker     * XXX: Check this against hw
128*61046927SAndroid Build Coastguard Worker     */
129*61046927SAndroid Build Coastguard Worker    if (nr_samples > 1)
130*61046927SAndroid Build Coastguard Worker       offset_B = MAX2(offset_B, 1);
131*61046927SAndroid Build Coastguard Worker 
132*61046927SAndroid Build Coastguard Worker    tib.sample_size_B = ALIGN_POT(offset_B, 8);
133*61046927SAndroid Build Coastguard Worker 
134*61046927SAndroid Build Coastguard Worker    tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
135*61046927SAndroid Build Coastguard Worker 
136*61046927SAndroid Build Coastguard Worker    agx_tilebuffer_pack_usc(&tib);
137*61046927SAndroid Build Coastguard Worker    return tib;
138*61046927SAndroid Build Coastguard Worker }
139*61046927SAndroid Build Coastguard Worker 
140*61046927SAndroid Build Coastguard Worker enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)141*61046927SAndroid Build Coastguard Worker agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
142*61046927SAndroid Build Coastguard Worker {
143*61046927SAndroid Build Coastguard Worker    return ail_pixel_format[tib->logical_format[rt]].renderable;
144*61046927SAndroid Build Coastguard Worker }
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)147*61046927SAndroid Build Coastguard Worker agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
148*61046927SAndroid Build Coastguard Worker {
149*61046927SAndroid Build Coastguard Worker    /* We don't bother support masking with spilled render targets. This might be
150*61046927SAndroid Build Coastguard Worker     * optimized in the future but spilling is so rare anyway it's not worth it.
151*61046927SAndroid Build Coastguard Worker     */
152*61046927SAndroid Build Coastguard Worker    if (tib->spilled[rt])
153*61046927SAndroid Build Coastguard Worker       return false;
154*61046927SAndroid Build Coastguard Worker 
155*61046927SAndroid Build Coastguard Worker    enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
156*61046927SAndroid Build Coastguard Worker    return ail_isa_format_supports_mask((enum ail_isa_format)fmt);
157*61046927SAndroid Build Coastguard Worker }
158*61046927SAndroid Build Coastguard Worker 
159*61046927SAndroid Build Coastguard Worker uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)160*61046927SAndroid Build Coastguard Worker agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
161*61046927SAndroid Build Coastguard Worker {
162*61046927SAndroid Build Coastguard Worker    return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
163*61046927SAndroid Build Coastguard Worker           tib->tile_size.height;
164*61046927SAndroid Build Coastguard Worker }
165*61046927SAndroid Build Coastguard Worker 
166*61046927SAndroid Build Coastguard Worker void
agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout * tib)167*61046927SAndroid Build Coastguard Worker agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout *tib)
168*61046927SAndroid Build Coastguard Worker {
169*61046927SAndroid Build Coastguard Worker    agx_pack(&tib->usc, USC_SHARED, cfg) {
170*61046927SAndroid Build Coastguard Worker       if (tib->nr_samples > 0) {
171*61046927SAndroid Build Coastguard Worker          cfg.uses_shared_memory = true;
172*61046927SAndroid Build Coastguard Worker          cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
173*61046927SAndroid Build Coastguard Worker          cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
174*61046927SAndroid Build Coastguard Worker          cfg.sample_count = tib->nr_samples;
175*61046927SAndroid Build Coastguard Worker          cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
176*61046927SAndroid Build Coastguard Worker       } else {
177*61046927SAndroid Build Coastguard Worker          cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE;
178*61046927SAndroid Build Coastguard Worker          cfg.bytes_per_threadgroup = 65536;
179*61046927SAndroid Build Coastguard Worker       }
180*61046927SAndroid Build Coastguard Worker    }
181*61046927SAndroid Build Coastguard Worker }
182