xref: /aosp_15_r20/external/mesa3d/src/panfrost/shared/test/test-tiling.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2022 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "pan_tiling.h"
25 
26 #include <gtest/gtest.h>
27 
28 /*
29  * Reference tiling algorithm, written for clarity rather than performance. See
30  * docs/drivers/panfrost.rst for details on the format.
31  */
32 
33 static unsigned
u_order(unsigned x,unsigned y)34 u_order(unsigned x, unsigned y)
35 {
36    assert(x < 16 && y < 16);
37 
38    unsigned xy0 = ((x ^ y) & 1) ? 1 : 0;
39    unsigned xy1 = ((x ^ y) & 2) ? 1 : 0;
40    unsigned xy2 = ((x ^ y) & 4) ? 1 : 0;
41    unsigned xy3 = ((x ^ y) & 8) ? 1 : 0;
42 
43    unsigned y0 = (y & 1) ? 1 : 0;
44    unsigned y1 = (y & 2) ? 1 : 0;
45    unsigned y2 = (y & 4) ? 1 : 0;
46    unsigned y3 = (y & 8) ? 1 : 0;
47 
48    return (xy0 << 0) | (y0 << 1) | (xy1 << 2) | (y1 << 3) | (xy2 << 4) |
49           (y2 << 5) | (xy3 << 6) | (y3 << 7);
50 }
51 
52 /* x/y are in blocks */
53 static unsigned
tiled_offset(unsigned x,unsigned y,unsigned stride,unsigned tilesize,unsigned blocksize)54 tiled_offset(unsigned x, unsigned y, unsigned stride, unsigned tilesize,
55              unsigned blocksize)
56 {
57    unsigned tile_x = x / tilesize;
58    unsigned tile_y = y / tilesize;
59 
60    unsigned x_in_tile = x % tilesize;
61    unsigned y_in_tile = y % tilesize;
62 
63    unsigned index_in_tile = u_order(x_in_tile, y_in_tile);
64 
65    unsigned row_offset = tile_y * stride;
66    unsigned col_offset = (tile_x * tilesize * tilesize) * blocksize;
67    unsigned block_offset = index_in_tile * blocksize;
68 
69    return row_offset + col_offset + block_offset;
70 }
71 
72 static unsigned
linear_offset(unsigned x,unsigned y,unsigned stride,unsigned blocksize)73 linear_offset(unsigned x, unsigned y, unsigned stride, unsigned blocksize)
74 {
75    return (stride * y) + (x * blocksize);
76 }
77 
78 static void
ref_access_tiled(void * dst,const void * src,unsigned region_x,unsigned region_y,unsigned w,unsigned h,uint32_t dst_stride,uint32_t src_stride,enum pipe_format format,bool dst_is_tiled)79 ref_access_tiled(void *dst, const void *src, unsigned region_x,
80                  unsigned region_y, unsigned w, unsigned h, uint32_t dst_stride,
81                  uint32_t src_stride, enum pipe_format format,
82                  bool dst_is_tiled)
83 {
84    const struct util_format_description *desc = util_format_description(format);
85    ;
86 
87    unsigned tilesize = (desc->block.width > 1) ? 4 : 16;
88    unsigned blocksize = (desc->block.bits / 8);
89 
90    unsigned w_block = w / desc->block.width;
91    unsigned h_block = h / desc->block.height;
92 
93    unsigned region_x_block = region_x / desc->block.width;
94    unsigned region_y_block = region_y / desc->block.height;
95 
96    for (unsigned linear_y_block = 0; linear_y_block < h_block;
97         ++linear_y_block) {
98       for (unsigned linear_x_block = 0; linear_x_block < w_block;
99            ++linear_x_block) {
100 
101          unsigned tiled_x_block = region_x_block + linear_x_block;
102          unsigned tiled_y_block = region_y_block + linear_y_block;
103 
104          unsigned dst_offset, src_offset;
105 
106          if (dst_is_tiled) {
107             dst_offset = tiled_offset(tiled_x_block, tiled_y_block, dst_stride,
108                                       tilesize, blocksize);
109             src_offset = linear_offset(linear_x_block, linear_y_block,
110                                        src_stride, blocksize);
111          } else {
112             dst_offset = linear_offset(linear_x_block, linear_y_block,
113                                        dst_stride, blocksize);
114             src_offset = tiled_offset(tiled_x_block, tiled_y_block, src_stride,
115                                       tilesize, blocksize);
116          }
117 
118          memcpy((uint8_t *)dst + dst_offset, (const uint8_t *)src + src_offset,
119                 desc->block.bits / 8);
120       }
121    }
122 }
123 
124 /*
125  * Helper to build test cases for tiled texture access. This test suite compares
126  * the above reference tiling algorithm to the optimized algorithm used in
127  * production.
128  */
129 static void
test(unsigned width,unsigned height,unsigned rx,unsigned ry,unsigned rw,unsigned rh,unsigned linear_stride,enum pipe_format format,bool store)130 test(unsigned width, unsigned height, unsigned rx, unsigned ry, unsigned rw,
131      unsigned rh, unsigned linear_stride, enum pipe_format format, bool store)
132 {
133    unsigned bpp = util_format_get_blocksize(format);
134    unsigned tile_height = util_format_is_compressed(format) ? 4 : 16;
135 
136    unsigned tiled_width = ALIGN_POT(width, 16);
137    unsigned tiled_height = ALIGN_POT(height, 16);
138    unsigned tiled_stride = tiled_width * tile_height * bpp;
139 
140    unsigned dst_stride = store ? tiled_stride : linear_stride;
141    unsigned src_stride = store ? linear_stride : tiled_stride;
142 
143    void *tiled = calloc(bpp, tiled_width * tiled_height);
144    void *linear = calloc(bpp, rw * linear_stride);
145    void *ref =
146       calloc(bpp, store ? (tiled_width * tiled_height) : (rw * linear_stride));
147 
148    if (store) {
149       for (unsigned i = 0; i < bpp * rw * linear_stride; ++i) {
150          ((uint8_t *)linear)[i] = (i & 0xFF);
151       }
152 
153       panfrost_store_tiled_image(tiled, linear, rx, ry, rw, rh, dst_stride,
154                                  src_stride, format);
155    } else {
156       for (unsigned i = 0; i < bpp * tiled_width * tiled_height; ++i) {
157          ((uint8_t *)tiled)[i] = (i & 0xFF);
158       }
159 
160       panfrost_load_tiled_image(linear, tiled, rx, ry, rw, rh, dst_stride,
161                                 src_stride, format);
162    }
163 
164    ref_access_tiled(ref, store ? linear : tiled, rx, ry, rw, rh, dst_stride,
165                     src_stride, format, store);
166 
167    if (store)
168       EXPECT_EQ(memcmp(ref, tiled, bpp * tiled_width * tiled_height), 0);
169    else
170       EXPECT_EQ(memcmp(ref, linear, bpp * rw * linear_stride), 0);
171 
172    free(ref);
173    free(tiled);
174    free(linear);
175 }
176 
177 static void
test_ldst(unsigned width,unsigned height,unsigned rx,unsigned ry,unsigned rw,unsigned rh,unsigned linear_stride,enum pipe_format format)178 test_ldst(unsigned width, unsigned height, unsigned rx, unsigned ry,
179           unsigned rw, unsigned rh, unsigned linear_stride,
180           enum pipe_format format)
181 {
182    test(width, height, rx, ry, rw, rh, linear_stride, format, true);
183    test(width, height, rx, ry, rw, rh, linear_stride, format, false);
184 }
185 
TEST(UInterleavedTiling,RegulatFormats)186 TEST(UInterleavedTiling, RegulatFormats)
187 {
188    /* 8-bit */
189    test_ldst(23, 17, 0, 0, 23, 17, 23, PIPE_FORMAT_R8_UINT);
190 
191    /* 16-bit */
192    test_ldst(23, 17, 0, 0, 23, 17, 23 * 2, PIPE_FORMAT_R8G8_UINT);
193 
194    /* 24-bit */
195    test_ldst(23, 17, 0, 0, 23, 17, 23 * 3, PIPE_FORMAT_R8G8B8_UINT);
196 
197    /* 32-bit */
198    test_ldst(23, 17, 0, 0, 23, 17, 23 * 4, PIPE_FORMAT_R32_UINT);
199 
200    /* 48-bit */
201    test_ldst(23, 17, 0, 0, 23, 17, 23 * 6, PIPE_FORMAT_R16G16B16_UINT);
202 
203    /* 64-bit */
204    test_ldst(23, 17, 0, 0, 23, 17, 23 * 8, PIPE_FORMAT_R32G32_UINT);
205 
206    /* 96-bit */
207    test_ldst(23, 17, 0, 0, 23, 17, 23 * 12, PIPE_FORMAT_R32G32B32_UINT);
208 
209    /* 128-bit */
210    test_ldst(23, 17, 0, 0, 23, 17, 23 * 16, PIPE_FORMAT_R32G32B32A32_UINT);
211 }
212 
TEST(UInterleavedTiling,UnpackedStrides)213 TEST(UInterleavedTiling, UnpackedStrides)
214 {
215    test_ldst(23, 17, 0, 0, 23, 17, 369 * 1, PIPE_FORMAT_R8_SINT);
216    test_ldst(23, 17, 0, 0, 23, 17, 369 * 2, PIPE_FORMAT_R8G8_SINT);
217    test_ldst(23, 17, 0, 0, 23, 17, 369 * 3, PIPE_FORMAT_R8G8B8_SINT);
218    test_ldst(23, 17, 0, 0, 23, 17, 369 * 4, PIPE_FORMAT_R32_SINT);
219    test_ldst(23, 17, 0, 0, 23, 17, 369 * 6, PIPE_FORMAT_R16G16B16_SINT);
220    test_ldst(23, 17, 0, 0, 23, 17, 369 * 8, PIPE_FORMAT_R32G32_SINT);
221    test_ldst(23, 17, 0, 0, 23, 17, 369 * 12, PIPE_FORMAT_R32G32B32_SINT);
222    test_ldst(23, 17, 0, 0, 23, 17, 369 * 16, PIPE_FORMAT_R32G32B32A32_SINT);
223 }
224 
TEST(UInterleavedTiling,PartialAccess)225 TEST(UInterleavedTiling, PartialAccess)
226 {
227    test_ldst(23, 17, 3, 1, 13, 7, 369 * 1, PIPE_FORMAT_R8_UNORM);
228    test_ldst(23, 17, 3, 1, 13, 7, 369 * 2, PIPE_FORMAT_R8G8_UNORM);
229    test_ldst(23, 17, 3, 1, 13, 7, 369 * 3, PIPE_FORMAT_R8G8B8_UNORM);
230    test_ldst(23, 17, 3, 1, 13, 7, 369 * 4, PIPE_FORMAT_R32_UNORM);
231    test_ldst(23, 17, 3, 1, 13, 7, 369 * 6, PIPE_FORMAT_R16G16B16_UNORM);
232    test_ldst(23, 17, 3, 1, 13, 7, 369 * 8, PIPE_FORMAT_R32G32_UNORM);
233    test_ldst(23, 17, 3, 1, 13, 7, 369 * 12, PIPE_FORMAT_R32G32B32_UNORM);
234    test_ldst(23, 17, 3, 1, 13, 7, 369 * 16, PIPE_FORMAT_R32G32B32A32_UNORM);
235 }
236 
TEST(UInterleavedTiling,ETC)237 TEST(UInterleavedTiling, ETC)
238 {
239    /* Block alignment assumed */
240    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC1_RGB8);
241    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RGB8A1);
242    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
243 }
244 
TEST(UInterleavedTiling,PartialETC)245 TEST(UInterleavedTiling, PartialETC)
246 {
247    /* Block alignment assumed */
248    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC1_RGB8);
249    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RGB8A1);
250    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_ETC2_RG11_SNORM);
251 }
252 
TEST(UInterleavedTiling,DXT)253 TEST(UInterleavedTiling, DXT)
254 {
255    /* Block alignment assumed */
256    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT1_RGB);
257    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT3_RGBA);
258    test_ldst(32, 32, 0, 0, 32, 32, 512, PIPE_FORMAT_DXT5_RGBA);
259 }
260 
TEST(UInterleavedTiling,PartialDXT)261 TEST(UInterleavedTiling, PartialDXT)
262 {
263    /* Block alignment assumed */
264    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT1_RGB);
265    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT3_RGBA);
266    test_ldst(32, 32, 4, 8, 16, 12, 512, PIPE_FORMAT_DXT5_RGBA);
267 }
268 
TEST(UInterleavedTiling,ASTC)269 TEST(UInterleavedTiling, ASTC)
270 {
271    /* Block alignment assumed */
272    test_ldst(40, 40, 0, 0, 40, 40, 512, PIPE_FORMAT_ASTC_4x4);
273    test_ldst(50, 40, 0, 0, 50, 40, 512, PIPE_FORMAT_ASTC_5x4);
274    test_ldst(50, 50, 0, 0, 50, 50, 512, PIPE_FORMAT_ASTC_5x5);
275 }
276 
TEST(UInterleavedTiling,PartialASTC)277 TEST(UInterleavedTiling, PartialASTC)
278 {
279    /* Block alignment assumed */
280    test_ldst(40, 40, 4, 4, 16, 8, 512, PIPE_FORMAT_ASTC_4x4);
281    test_ldst(50, 40, 5, 4, 10, 8, 512, PIPE_FORMAT_ASTC_5x4);
282    test_ldst(50, 50, 5, 5, 10, 10, 512, PIPE_FORMAT_ASTC_5x5);
283 }
284