xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/pan_tiler.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright (C) 2019 Collabora, Ltd.
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*61046927SAndroid Build Coastguard Worker  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21*61046927SAndroid Build Coastguard Worker  * SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  *
23*61046927SAndroid Build Coastguard Worker  * Authors:
24*61046927SAndroid Build Coastguard Worker  *   Alyssa Rosenzweig <[email protected]>
25*61046927SAndroid Build Coastguard Worker  */
26*61046927SAndroid Build Coastguard Worker 
27*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
28*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
29*61046927SAndroid Build Coastguard Worker #include "pan_encoder.h"
30*61046927SAndroid Build Coastguard Worker 
31*61046927SAndroid Build Coastguard Worker /* Mali GPUs are tiled-mode renderers, rather than immediate-mode.
32*61046927SAndroid Build Coastguard Worker  * Conceptually, the screen is divided into 16x16 tiles. Vertex shaders run.
33*61046927SAndroid Build Coastguard Worker  * Then, a fixed-function hardware block (the tiler) consumes the gl_Position
34*61046927SAndroid Build Coastguard Worker  * results. For each triangle specified, it marks each containing tile as
35*61046927SAndroid Build Coastguard Worker  * containing that triangle. This set of "triangles per tile" form the "polygon
36*61046927SAndroid Build Coastguard Worker  * list". Finally, the rasterization unit consumes the polygon list to invoke
37*61046927SAndroid Build Coastguard Worker  * the fragment shader.
38*61046927SAndroid Build Coastguard Worker  *
39*61046927SAndroid Build Coastguard Worker  * In practice, it's a bit more complicated than this. On Midgard chips with an
40*61046927SAndroid Build Coastguard Worker  * "advanced tiling unit" (all except T720/T820/T830), 16x16 is the logical
41*61046927SAndroid Build Coastguard Worker  * tile size, but Midgard features "hierarchical tiling", where power-of-two
42*61046927SAndroid Build Coastguard Worker  * multiples of the base tile size can be used: hierarchy level 0 (16x16),
43*61046927SAndroid Build Coastguard Worker  * level 1 (32x32), level 2 (64x64), per public information about Midgard's
44*61046927SAndroid Build Coastguard Worker  * tiling. In fact, tiling goes up to 4096x4096 (!), although in practice
45*61046927SAndroid Build Coastguard Worker  * 128x128 is the largest usually used (though higher modes are enabled).  The
46*61046927SAndroid Build Coastguard Worker  * idea behind hierarchical tiling is to use low tiling levels for small
47*61046927SAndroid Build Coastguard Worker  * triangles and high levels for large triangles, to minimize memory bandwidth
48*61046927SAndroid Build Coastguard Worker  * and repeated fragment shader invocations (the former issue inherent to
49*61046927SAndroid Build Coastguard Worker  * immediate-mode rendering and the latter common in traditional tilers).
50*61046927SAndroid Build Coastguard Worker  *
51*61046927SAndroid Build Coastguard Worker  * The tiler itself works by reading varyings in and writing a polygon list
52*61046927SAndroid Build Coastguard Worker  * out. Unfortunately (for us), both of these buffers are managed in main
53*61046927SAndroid Build Coastguard Worker  * memory; although they ideally will be cached, it is the drivers'
54*61046927SAndroid Build Coastguard Worker  * responsibility to allocate these buffers. Varying buffer allocation is
55*61046927SAndroid Build Coastguard Worker  * handled elsewhere, as it is not tiler specific; the real issue is allocating
56*61046927SAndroid Build Coastguard Worker  * the polygon list.
57*61046927SAndroid Build Coastguard Worker  *
58*61046927SAndroid Build Coastguard Worker  * This is hard, because from the driver's perspective, we have no information
59*61046927SAndroid Build Coastguard Worker  * about what geometry will actually look like on screen; that information is
60*61046927SAndroid Build Coastguard Worker  * only gained from running the vertex shader. (Theoretically, we could run the
61*61046927SAndroid Build Coastguard Worker  * vertex shaders in software as a prepass, or in hardware with transform
62*61046927SAndroid Build Coastguard Worker  * feedback as a prepass, but either idea is ludicrous on so many levels).
63*61046927SAndroid Build Coastguard Worker  *
64*61046927SAndroid Build Coastguard Worker  * Instead, Mali uses a bit of a hybrid approach, splitting the polygon list
65*61046927SAndroid Build Coastguard Worker  * into three distinct pieces. First, the driver statically determines which
66*61046927SAndroid Build Coastguard Worker  * tile hierarchy levels to use (more on that later). At this point, we know the
67*61046927SAndroid Build Coastguard Worker  * framebuffer dimensions and all the possible tilings of the framebuffer, so
68*61046927SAndroid Build Coastguard Worker  * we know exactly how many tiles exist across all hierarchy levels. The first
69*61046927SAndroid Build Coastguard Worker  * piece of the polygon list is the header, which is exactly 8 bytes per tile,
70*61046927SAndroid Build Coastguard Worker  * plus padding and a small 64-byte prologue. (If that doesn't remind you of
71*61046927SAndroid Build Coastguard Worker  * AFBC, it should. See pan_afbc.c for some fun parallels). The next part is
72*61046927SAndroid Build Coastguard Worker  * the polygon list body, which seems to contain 512 bytes per tile, again
73*61046927SAndroid Build Coastguard Worker  * across every level of the hierarchy. These two parts form the polygon list
74*61046927SAndroid Build Coastguard Worker  * buffer. This buffer has a statically determinable size, approximately equal
75*61046927SAndroid Build Coastguard Worker  * to the # of tiles across all hierarchy levels * (8 bytes + 512 bytes), plus
76*61046927SAndroid Build Coastguard Worker  * alignment / minimum restrictions / etc.
77*61046927SAndroid Build Coastguard Worker  *
78*61046927SAndroid Build Coastguard Worker  * The third piece is the easy one (for us): the tiler heap. In essence, the
79*61046927SAndroid Build Coastguard Worker  * tiler heap is a gigantic slab that's as big as could possibly be necessary
80*61046927SAndroid Build Coastguard Worker  * in the worst case imaginable. Just... a gigantic allocation that we give a
81*61046927SAndroid Build Coastguard Worker  * start and end pointer to. What's the catch? The tiler heap is lazily
82*61046927SAndroid Build Coastguard Worker  * allocated; that is, a huge amount of memory is _reserved_, but only a tiny
83*61046927SAndroid Build Coastguard Worker  * bit is actually allocated upfront. The GPU just keeps using the
84*61046927SAndroid Build Coastguard Worker  * unallocated-but-reserved portions as it goes along, generating page faults
85*61046927SAndroid Build Coastguard Worker  * if it goes beyond the allocation, and then the kernel is instructed to
86*61046927SAndroid Build Coastguard Worker  * expand the allocation on page fault (known in the vendor kernel as growable
87*61046927SAndroid Build Coastguard Worker  * memory). This is quite a bit of bookkeeping of its own, but that task is
88*61046927SAndroid Build Coastguard Worker  * pushed to kernel space and we can mostly ignore it here, just remembering to
89*61046927SAndroid Build Coastguard Worker  * set the GROWABLE flag so the kernel actually uses this path rather than
90*61046927SAndroid Build Coastguard Worker  * allocating a gigantic amount up front and burning a hole in RAM.
91*61046927SAndroid Build Coastguard Worker  *
92*61046927SAndroid Build Coastguard Worker  * As far as determining which hierarchy levels to use, the simple answer is
93*61046927SAndroid Build Coastguard Worker  * that right now, we don't. In the tiler configuration fields (consistent from
94*61046927SAndroid Build Coastguard Worker  * the earliest Midgard's SFBD through the latest Bifrost traces we have),
95*61046927SAndroid Build Coastguard Worker  * there is a hierarchy_mask field, controlling which levels (tile sizes) are
96*61046927SAndroid Build Coastguard Worker  * enabled. Ideally, the hierarchical tiling dream -- mapping big polygons to
97*61046927SAndroid Build Coastguard Worker  * big tiles and small polygons to small tiles -- would be realized here as
98*61046927SAndroid Build Coastguard Worker  * well. As long as there are polygons at all needing tiling, we always have to
99*61046927SAndroid Build Coastguard Worker  * have big tiles available, in case there are big polygons. But we don't
100*61046927SAndroid Build Coastguard Worker  * necessarily need small tiles available. Ideally, when there are small
101*61046927SAndroid Build Coastguard Worker  * polygons, small tiles are enabled (to avoid waste from putting small
102*61046927SAndroid Build Coastguard Worker  * triangles in the big tiles); when there are not, small tiles are disabled to
103*61046927SAndroid Build Coastguard Worker  * avoid enabling more levels than necessary, which potentially costs in memory
104*61046927SAndroid Build Coastguard Worker  * bandwidth / power / tiler performance.
105*61046927SAndroid Build Coastguard Worker  *
106*61046927SAndroid Build Coastguard Worker  * Of course, the driver has to figure this out statically. When tile
107*61046927SAndroid Build Coastguard Worker  * hiearchies are actually established, this occurs by the tiler in
108*61046927SAndroid Build Coastguard Worker  * fixed-function hardware, after the vertex shaders have run and there is
109*61046927SAndroid Build Coastguard Worker  * sufficient information to figure out the size of triangles. The driver has
110*61046927SAndroid Build Coastguard Worker  * no such luxury, again barring insane hacks like additionally running the
111*61046927SAndroid Build Coastguard Worker  * vertex shaders in software or in hardware via transform feedback. Thus, for
112*61046927SAndroid Build Coastguard Worker  * the driver, we need a heuristic approach.
113*61046927SAndroid Build Coastguard Worker  *
114*61046927SAndroid Build Coastguard Worker  * There are lots of heuristics to guess triangle size statically you could
115*61046927SAndroid Build Coastguard Worker  * imagine, but one approach shines as particularly simple-stupid: assume all
116*61046927SAndroid Build Coastguard Worker  * on-screen triangles are equal size and spread equidistantly throughout the
117*61046927SAndroid Build Coastguard Worker  * screen. Let's be clear, this is NOT A VALID ASSUMPTION. But if we roll with
118*61046927SAndroid Build Coastguard Worker  * it, then we see:
119*61046927SAndroid Build Coastguard Worker  *
120*61046927SAndroid Build Coastguard Worker  *      Triangle Area   = (Screen Area / # of triangles)
121*61046927SAndroid Build Coastguard Worker  *                      = (Width * Height) / (# of triangles)
122*61046927SAndroid Build Coastguard Worker  *
123*61046927SAndroid Build Coastguard Worker  * Or if you prefer, we can also make a third CRAZY assumption that we only draw
124*61046927SAndroid Build Coastguard Worker  * right triangles with edges parallel/perpendicular to the sides of the screen
125*61046927SAndroid Build Coastguard Worker  * with no overdraw, forming a triangle grid across the screen:
126*61046927SAndroid Build Coastguard Worker  *
127*61046927SAndroid Build Coastguard Worker  * |--w--|
128*61046927SAndroid Build Coastguard Worker  *  _____   |
129*61046927SAndroid Build Coastguard Worker  * | /| /|  |
130*61046927SAndroid Build Coastguard Worker  * |/_|/_|  h
131*61046927SAndroid Build Coastguard Worker  * | /| /|  |
132*61046927SAndroid Build Coastguard Worker  * |/_|/_|  |
133*61046927SAndroid Build Coastguard Worker  *
134*61046927SAndroid Build Coastguard Worker  * Then you can use some middle school geometry and algebra to work out the
135*61046927SAndroid Build Coastguard Worker  * triangle dimensions. I started working on this, but realised I didn't need
136*61046927SAndroid Build Coastguard Worker  * to to make my point, but couldn't bare to erase that ASCII art. Anyway.
137*61046927SAndroid Build Coastguard Worker  *
138*61046927SAndroid Build Coastguard Worker  * POINT IS, by considering the ratio of screen area and triangle count, we can
139*61046927SAndroid Build Coastguard Worker  * estimate the triangle size. For a small size, use small bins; for a large
140*61046927SAndroid Build Coastguard Worker  * size, use large bins. Intuitively, this metric makes sense: when there are
141*61046927SAndroid Build Coastguard Worker  * few triangles on a large screen, you're probably compositing a UI and
142*61046927SAndroid Build Coastguard Worker  * therefore the triangles are large; when there are a lot of triangles on a
143*61046927SAndroid Build Coastguard Worker  * small screen, you're probably rendering a 3D mesh and therefore the
144*61046927SAndroid Build Coastguard Worker  * triangles are tiny. (Or better said -- there will be tiny triangles, even if
145*61046927SAndroid Build Coastguard Worker  * there are also large triangles. There have to be unless you expect crazy
146*61046927SAndroid Build Coastguard Worker  * overdraw. Generally, it's better to allow more small bin sizes than
147*61046927SAndroid Build Coastguard Worker  * necessary than not allow enough.)
148*61046927SAndroid Build Coastguard Worker  *
149*61046927SAndroid Build Coastguard Worker  * From this heuristic (or whatever), we determine the minimum allowable tile
150*61046927SAndroid Build Coastguard Worker  * size, and we use that to decide the hierarchy masking, selecting from the
151*61046927SAndroid Build Coastguard Worker  * minimum "ideal" tile size to the maximum tile size (2048x2048 in practice).
152*61046927SAndroid Build Coastguard Worker  *
153*61046927SAndroid Build Coastguard Worker  * Once we have that mask and the framebuffer dimensions, we can compute the
154*61046927SAndroid Build Coastguard Worker  * size of the statically-sized polygon list structures, allocate them, and go!
155*61046927SAndroid Build Coastguard Worker  *
156*61046927SAndroid Build Coastguard Worker  * -----
157*61046927SAndroid Build Coastguard Worker  *
158*61046927SAndroid Build Coastguard Worker  * On T720, T820, and T830, there is no support for hierarchical tiling.
159*61046927SAndroid Build Coastguard Worker  * Instead, the hardware allows the driver to select the tile size dynamically
160*61046927SAndroid Build Coastguard Worker  * on a per-framebuffer basis, including allowing rectangular/non-square tiles.
161*61046927SAndroid Build Coastguard Worker  * Rules for tile size selection are as follows:
162*61046927SAndroid Build Coastguard Worker  *
163*61046927SAndroid Build Coastguard Worker  *  - Dimensions must be powers-of-two.
164*61046927SAndroid Build Coastguard Worker  *  - The smallest tile is 16x16.
165*61046927SAndroid Build Coastguard Worker  *  - The tile width/height is at most the framebuffer w/h (clamp up to 16 pix)
166*61046927SAndroid Build Coastguard Worker  *  - There must be no more than 64 tiles in either dimension.
167*61046927SAndroid Build Coastguard Worker  *
168*61046927SAndroid Build Coastguard Worker  * Within these constraints, the driver is free to pick a tile size according
169*61046927SAndroid Build Coastguard Worker  * to some heuristic, similar to units with an advanced tiling unit.
170*61046927SAndroid Build Coastguard Worker  *
171*61046927SAndroid Build Coastguard Worker  * To pick a size without any heuristics, we may satisfy the constraints by
172*61046927SAndroid Build Coastguard Worker  * defaulting to 16x16 (a power-of-two). This fits the minimum. For the size
173*61046927SAndroid Build Coastguard Worker  * constraint, consider:
174*61046927SAndroid Build Coastguard Worker  *
175*61046927SAndroid Build Coastguard Worker  *      # of tiles < 64
176*61046927SAndroid Build Coastguard Worker  *      ceil (fb / tile) < 64
177*61046927SAndroid Build Coastguard Worker  *      (fb / tile) <= (64 - 1)
178*61046927SAndroid Build Coastguard Worker  *      tile <= fb / (64 - 1) <= next_power_of_two(fb / (64 - 1))
179*61046927SAndroid Build Coastguard Worker  *
180*61046927SAndroid Build Coastguard Worker  * Hence we clamp up to align_pot(fb / (64 - 1)).
181*61046927SAndroid Build Coastguard Worker 
182*61046927SAndroid Build Coastguard Worker  * Extending to use a selection heuristic left for future work.
183*61046927SAndroid Build Coastguard Worker  *
184*61046927SAndroid Build Coastguard Worker  * Once the tile size (w, h) is chosen, we compute the hierarchy "mask":
185*61046927SAndroid Build Coastguard Worker  *
186*61046927SAndroid Build Coastguard Worker  *      hierarchy_mask = (log2(h / 16) << 6) | log2(w / 16)
187*61046927SAndroid Build Coastguard Worker  *
188*61046927SAndroid Build Coastguard Worker  * Of course with no hierarchical tiling, this is not a mask; it's just a field
189*61046927SAndroid Build Coastguard Worker  * specifying the tile size. But I digress.
190*61046927SAndroid Build Coastguard Worker  *
191*61046927SAndroid Build Coastguard Worker  * We also compute the polgon list sizes (with framebuffer size W, H) as:
192*61046927SAndroid Build Coastguard Worker  *
193*61046927SAndroid Build Coastguard Worker  *      full_size = 0x200 + 0x200 * ceil(W / w) * ceil(H / h)
194*61046927SAndroid Build Coastguard Worker  *      offset = 8 * ceil(W / w) * ceil(H / h)
195*61046927SAndroid Build Coastguard Worker  *
196*61046927SAndroid Build Coastguard Worker  * It further appears necessary to round down offset to the nearest 0x200.
197*61046927SAndroid Build Coastguard Worker  * Possibly we would also round down full_size to the nearest 0x200 but
198*61046927SAndroid Build Coastguard Worker  * full_size/0x200 = (1 + ceil(W / w) * ceil(H / h)) is an integer so there's
199*61046927SAndroid Build Coastguard Worker  * nothing to do.
200*61046927SAndroid Build Coastguard Worker  */
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker /* Hierarchical tiling spans from 16x16 to 4096x4096 tiles */
203*61046927SAndroid Build Coastguard Worker 
204*61046927SAndroid Build Coastguard Worker #define MIN_TILE_SIZE 16
205*61046927SAndroid Build Coastguard Worker #define MAX_TILE_SIZE 4096
206*61046927SAndroid Build Coastguard Worker 
207*61046927SAndroid Build Coastguard Worker /* Constants as shifts for easier power-of-two iteration */
208*61046927SAndroid Build Coastguard Worker 
209*61046927SAndroid Build Coastguard Worker #define MIN_TILE_SHIFT util_logbase2(MIN_TILE_SIZE)
210*61046927SAndroid Build Coastguard Worker #define MAX_TILE_SHIFT util_logbase2(MAX_TILE_SIZE)
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker /* The hierarchy has a 64-byte prologue */
213*61046927SAndroid Build Coastguard Worker #define PROLOGUE_SIZE 0x40
214*61046927SAndroid Build Coastguard Worker 
215*61046927SAndroid Build Coastguard Worker /* For each tile (across all hierarchy levels), there is 8 bytes of header */
216*61046927SAndroid Build Coastguard Worker #define HEADER_BYTES_PER_TILE 0x8
217*61046927SAndroid Build Coastguard Worker 
218*61046927SAndroid Build Coastguard Worker /* Likewise, each tile per level has 512 bytes of body */
219*61046927SAndroid Build Coastguard Worker #define FULL_BYTES_PER_TILE 0x200
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker static unsigned
panfrost_hierarchy_size(unsigned width,unsigned height,unsigned mask,unsigned bytes_per_tile)222*61046927SAndroid Build Coastguard Worker panfrost_hierarchy_size(unsigned width, unsigned height, unsigned mask,
223*61046927SAndroid Build Coastguard Worker                         unsigned bytes_per_tile)
224*61046927SAndroid Build Coastguard Worker {
225*61046927SAndroid Build Coastguard Worker    unsigned size = PROLOGUE_SIZE;
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker    /* Iterate hierarchy levels */
228*61046927SAndroid Build Coastguard Worker    u_foreach_bit(level, mask) {
229*61046927SAndroid Build Coastguard Worker       assert(level <= (MAX_TILE_SHIFT - MIN_TILE_SHIFT) &&
230*61046927SAndroid Build Coastguard Worker              "invalid hierarchy mask");
231*61046927SAndroid Build Coastguard Worker 
232*61046927SAndroid Build Coastguard Worker       /* Levels are power-of-two sizes */
233*61046927SAndroid Build Coastguard Worker       unsigned tile_size = MIN_TILE_SIZE << level;
234*61046927SAndroid Build Coastguard Worker 
235*61046927SAndroid Build Coastguard Worker       size += DIV_ROUND_UP(width, tile_size) * DIV_ROUND_UP(height, tile_size) *
236*61046927SAndroid Build Coastguard Worker               bytes_per_tile;
237*61046927SAndroid Build Coastguard Worker    }
238*61046927SAndroid Build Coastguard Worker 
239*61046927SAndroid Build Coastguard Worker    /* This size will be used as an offset, so ensure it's aligned */
240*61046927SAndroid Build Coastguard Worker    return ALIGN_POT(size, 0x200);
241*61046927SAndroid Build Coastguard Worker }
242*61046927SAndroid Build Coastguard Worker 
243*61046927SAndroid Build Coastguard Worker /* Implement the formula:
244*61046927SAndroid Build Coastguard Worker  *
245*61046927SAndroid Build Coastguard Worker  *      0x200 + bytes_per_tile * ceil(W / w) * ceil(H / h)
246*61046927SAndroid Build Coastguard Worker  *
247*61046927SAndroid Build Coastguard Worker  * rounding down the answer to the nearest 0x200. This is used to compute both
248*61046927SAndroid Build Coastguard Worker  * header and body sizes for GPUs without hierarchical tiling. Essentially,
249*61046927SAndroid Build Coastguard Worker  * computing a single hierarchy level, since there isn't any hierarchy!
250*61046927SAndroid Build Coastguard Worker  */
251*61046927SAndroid Build Coastguard Worker 
252*61046927SAndroid Build Coastguard Worker static unsigned
panfrost_flat_size(unsigned width,unsigned height,unsigned dim,unsigned bytes_per_tile)253*61046927SAndroid Build Coastguard Worker panfrost_flat_size(unsigned width, unsigned height, unsigned dim,
254*61046927SAndroid Build Coastguard Worker                    unsigned bytes_per_tile)
255*61046927SAndroid Build Coastguard Worker {
256*61046927SAndroid Build Coastguard Worker    /* First, extract the tile dimensions */
257*61046927SAndroid Build Coastguard Worker    unsigned tw = (1 << (dim & 0b111)) * 8;
258*61046927SAndroid Build Coastguard Worker    unsigned th = (1 << ((dim & (0b111 << 6)) >> 6)) * 8;
259*61046927SAndroid Build Coastguard Worker 
260*61046927SAndroid Build Coastguard Worker    /* Calculate the raw size */
261*61046927SAndroid Build Coastguard Worker    unsigned raw =
262*61046927SAndroid Build Coastguard Worker       DIV_ROUND_UP(width, tw) * DIV_ROUND_UP(height, th) * bytes_per_tile;
263*61046927SAndroid Build Coastguard Worker 
264*61046927SAndroid Build Coastguard Worker    /* Round down and add offset */
265*61046927SAndroid Build Coastguard Worker    return 0x200 + ((raw / 0x200) * 0x200);
266*61046927SAndroid Build Coastguard Worker }
267*61046927SAndroid Build Coastguard Worker 
268*61046927SAndroid Build Coastguard Worker /* Given a hierarchy mask and a framebuffer size, compute the header size */
269*61046927SAndroid Build Coastguard Worker 
270*61046927SAndroid Build Coastguard Worker unsigned
panfrost_tiler_header_size(unsigned width,unsigned height,unsigned mask,bool hierarchy)271*61046927SAndroid Build Coastguard Worker panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask,
272*61046927SAndroid Build Coastguard Worker                            bool hierarchy)
273*61046927SAndroid Build Coastguard Worker {
274*61046927SAndroid Build Coastguard Worker    if (hierarchy)
275*61046927SAndroid Build Coastguard Worker       return panfrost_hierarchy_size(width, height, mask,
276*61046927SAndroid Build Coastguard Worker                                      HEADER_BYTES_PER_TILE);
277*61046927SAndroid Build Coastguard Worker    else
278*61046927SAndroid Build Coastguard Worker       return panfrost_flat_size(width, height, mask, HEADER_BYTES_PER_TILE);
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker /* The combined header/body is sized similarly (but it is significantly
282*61046927SAndroid Build Coastguard Worker  * larger), except that it can be empty when the tiler disabled, rather than
283*61046927SAndroid Build Coastguard Worker  * getting clamped to a minimum size.
284*61046927SAndroid Build Coastguard Worker  */
285*61046927SAndroid Build Coastguard Worker 
286*61046927SAndroid Build Coastguard Worker unsigned
panfrost_tiler_full_size(unsigned width,unsigned height,unsigned mask,bool hierarchy)287*61046927SAndroid Build Coastguard Worker panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask,
288*61046927SAndroid Build Coastguard Worker                          bool hierarchy)
289*61046927SAndroid Build Coastguard Worker {
290*61046927SAndroid Build Coastguard Worker    if (hierarchy)
291*61046927SAndroid Build Coastguard Worker       return panfrost_hierarchy_size(width, height, mask, FULL_BYTES_PER_TILE);
292*61046927SAndroid Build Coastguard Worker    else
293*61046927SAndroid Build Coastguard Worker       return panfrost_flat_size(width, height, mask, FULL_BYTES_PER_TILE);
294*61046927SAndroid Build Coastguard Worker }
295*61046927SAndroid Build Coastguard Worker 
296*61046927SAndroid Build Coastguard Worker /* On GPUs without hierarchical tiling, we choose a tile size directly and
297*61046927SAndroid Build Coastguard Worker  * stuff it into the field otherwise known as hierarchy mask (not a mask). */
298*61046927SAndroid Build Coastguard Worker 
299*61046927SAndroid Build Coastguard Worker static unsigned
panfrost_choose_tile_size(unsigned width,unsigned height,unsigned vertex_count)300*61046927SAndroid Build Coastguard Worker panfrost_choose_tile_size(unsigned width, unsigned height,
301*61046927SAndroid Build Coastguard Worker                           unsigned vertex_count)
302*61046927SAndroid Build Coastguard Worker {
303*61046927SAndroid Build Coastguard Worker    /* Figure out the ideal tile size. Eventually a heuristic should be
304*61046927SAndroid Build Coastguard Worker     * used for this */
305*61046927SAndroid Build Coastguard Worker 
306*61046927SAndroid Build Coastguard Worker    unsigned best_w = 16;
307*61046927SAndroid Build Coastguard Worker    unsigned best_h = 16;
308*61046927SAndroid Build Coastguard Worker 
309*61046927SAndroid Build Coastguard Worker    /* Clamp so there are less than 64 tiles in each direction */
310*61046927SAndroid Build Coastguard Worker 
311*61046927SAndroid Build Coastguard Worker    best_w = MAX2(best_w, util_next_power_of_two(width / 63));
312*61046927SAndroid Build Coastguard Worker    best_h = MAX2(best_h, util_next_power_of_two(height / 63));
313*61046927SAndroid Build Coastguard Worker 
314*61046927SAndroid Build Coastguard Worker    /* We have our ideal tile size, so encode */
315*61046927SAndroid Build Coastguard Worker 
316*61046927SAndroid Build Coastguard Worker    unsigned exp_w = util_logbase2(best_w / 16);
317*61046927SAndroid Build Coastguard Worker    unsigned exp_h = util_logbase2(best_h / 16);
318*61046927SAndroid Build Coastguard Worker 
319*61046927SAndroid Build Coastguard Worker    return exp_w | (exp_h << 6);
320*61046927SAndroid Build Coastguard Worker }
321*61046927SAndroid Build Coastguard Worker 
322*61046927SAndroid Build Coastguard Worker unsigned
panfrost_choose_hierarchy_mask(unsigned width,unsigned height,unsigned vertex_count,bool hierarchy)323*61046927SAndroid Build Coastguard Worker panfrost_choose_hierarchy_mask(unsigned width, unsigned height,
324*61046927SAndroid Build Coastguard Worker                                unsigned vertex_count, bool hierarchy)
325*61046927SAndroid Build Coastguard Worker {
326*61046927SAndroid Build Coastguard Worker    /* If there is no geometry, we don't bother enabling anything */
327*61046927SAndroid Build Coastguard Worker 
328*61046927SAndroid Build Coastguard Worker    if (!vertex_count)
329*61046927SAndroid Build Coastguard Worker       return 0x00;
330*61046927SAndroid Build Coastguard Worker 
331*61046927SAndroid Build Coastguard Worker    if (!hierarchy)
332*61046927SAndroid Build Coastguard Worker       return panfrost_choose_tile_size(width, height, vertex_count);
333*61046927SAndroid Build Coastguard Worker 
334*61046927SAndroid Build Coastguard Worker    /* Heuristic: choose the largest minimum bin size such that there are an
335*61046927SAndroid Build Coastguard Worker     * average of k vertices per bin at the lowest level. This is modeled as:
336*61046927SAndroid Build Coastguard Worker     *
337*61046927SAndroid Build Coastguard Worker     *    k = vertex_count / ((fb width / bin width) * (fb height / bin height))
338*61046927SAndroid Build Coastguard Worker     *
339*61046927SAndroid Build Coastguard Worker     * Bins are square, so solving for bin size = bin width = bin height:
340*61046927SAndroid Build Coastguard Worker     *
341*61046927SAndroid Build Coastguard Worker     *    bin size = sqrt(((k) (fb width) (fb height) / vertex count))
342*61046927SAndroid Build Coastguard Worker     *
343*61046927SAndroid Build Coastguard Worker     * k = 4 represents each bin as a QUAD. If the screen is completely tiled
344*61046927SAndroid Build Coastguard Worker     * into nonoverlapping uniform power-of-two squares, then this heuristic sets
345*61046927SAndroid Build Coastguard Worker     * the bin size to the quad size, which seems like an ok choice.
346*61046927SAndroid Build Coastguard Worker     */
347*61046927SAndroid Build Coastguard Worker    unsigned k = 4;
348*61046927SAndroid Build Coastguard Worker    unsigned log2_min_bin_size =
349*61046927SAndroid Build Coastguard Worker       util_logbase2_ceil((k * width * height) / vertex_count) / 2;
350*61046927SAndroid Build Coastguard Worker 
351*61046927SAndroid Build Coastguard Worker    /* Do not use bins larger than the framebuffer. They will be empty. */
352*61046927SAndroid Build Coastguard Worker    unsigned log2_max_bin_size = util_logbase2_ceil(MAX2(width, height));
353*61046927SAndroid Build Coastguard Worker 
354*61046927SAndroid Build Coastguard Worker    /* For small framebuffers, use one big tile */
355*61046927SAndroid Build Coastguard Worker    log2_min_bin_size = MIN2(log2_min_bin_size, log2_max_bin_size);
356*61046927SAndroid Build Coastguard Worker 
357*61046927SAndroid Build Coastguard Worker    /* Clamp to valid bin sizes */
358*61046927SAndroid Build Coastguard Worker    log2_min_bin_size = CLAMP(log2_min_bin_size, MIN_TILE_SHIFT, MAX_TILE_SHIFT);
359*61046927SAndroid Build Coastguard Worker    log2_max_bin_size = CLAMP(log2_max_bin_size, MIN_TILE_SHIFT, MAX_TILE_SHIFT);
360*61046927SAndroid Build Coastguard Worker 
361*61046927SAndroid Build Coastguard Worker    /* Bin indices are numbered from 0 started with MIN_TILE_SIZE */
362*61046927SAndroid Build Coastguard Worker    unsigned min_bin_index = log2_min_bin_size - MIN_TILE_SHIFT;
363*61046927SAndroid Build Coastguard Worker    unsigned max_bin_index = log2_max_bin_size - MIN_TILE_SHIFT;
364*61046927SAndroid Build Coastguard Worker 
365*61046927SAndroid Build Coastguard Worker    /* Enable up to 8 bins starting from the heuristic selected minimum. 8
366*61046927SAndroid Build Coastguard Worker     * is the implementation specific maximum in supported Midgard devices.
367*61046927SAndroid Build Coastguard Worker     */
368*61046927SAndroid Build Coastguard Worker    unsigned mask =
369*61046927SAndroid Build Coastguard Worker       (BITFIELD_MASK(8) << min_bin_index) & BITFIELD_MASK(max_bin_index + 1);
370*61046927SAndroid Build Coastguard Worker 
371*61046927SAndroid Build Coastguard Worker    assert(mask != 0 && "too few levels");
372*61046927SAndroid Build Coastguard Worker    assert(util_bitcount(mask) <= 8 && "too many levels");
373*61046927SAndroid Build Coastguard Worker 
374*61046927SAndroid Build Coastguard Worker    return mask;
375*61046927SAndroid Build Coastguard Worker }
376