xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/nodearray.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright (C) 2021 Icecream95
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*61046927SAndroid Build Coastguard Worker  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21*61046927SAndroid Build Coastguard Worker  * SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  */
23*61046927SAndroid Build Coastguard Worker 
24*61046927SAndroid Build Coastguard Worker /* A nodearray is an array type that is either sparse or dense, depending on
25*61046927SAndroid Build Coastguard Worker  * the number of elements.
26*61046927SAndroid Build Coastguard Worker  *
27*61046927SAndroid Build Coastguard Worker  * When the number of elements is over a threshold (max_sparse), the dense mode
28*61046927SAndroid Build Coastguard Worker  * is used, and the nodearray is simply a container for an array.
29*61046927SAndroid Build Coastguard Worker  *
30*61046927SAndroid Build Coastguard Worker  * In sparse mode, the array has elements with a 24-bit node index and a value.
31*61046927SAndroid Build Coastguard Worker  * The nodes are always sorted, so that a binary search can be used to find
32*61046927SAndroid Build Coastguard Worker  * elements. Nonexistent elements are treated as zero.
33*61046927SAndroid Build Coastguard Worker  *
34*61046927SAndroid Build Coastguard Worker  * Function names follow ARM instruction names: orr does *elem |= value.
35*61046927SAndroid Build Coastguard Worker  *
36*61046927SAndroid Build Coastguard Worker  * Although it's probably already fast enough, the datastructure could be sped
37*61046927SAndroid Build Coastguard Worker  * up a lot, especially when NEON is available, by making the sparse mode store
38*61046927SAndroid Build Coastguard Worker  * sixteen adjacent values, so that adding new keys also allocates nearby keys,
39*61046927SAndroid Build Coastguard Worker  * and to allow for vectorising iteration, as can be done when in the dense
40*61046927SAndroid Build Coastguard Worker  * mode.
41*61046927SAndroid Build Coastguard Worker  */
42*61046927SAndroid Build Coastguard Worker 
43*61046927SAndroid Build Coastguard Worker #ifndef __BIFROST_NODEARRAY_H
44*61046927SAndroid Build Coastguard Worker #define __BIFROST_NODEARRAY_H
45*61046927SAndroid Build Coastguard Worker 
46*61046927SAndroid Build Coastguard Worker #include <stdint.h>
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker #ifdef __cplusplus
49*61046927SAndroid Build Coastguard Worker extern "C" {
50*61046927SAndroid Build Coastguard Worker #endif
51*61046927SAndroid Build Coastguard Worker 
52*61046927SAndroid Build Coastguard Worker /* A value that may be stored in a nodearray element, used directly for dense
53*61046927SAndroid Build Coastguard Worker  * elements and included into sparse elements.
54*61046927SAndroid Build Coastguard Worker  */
55*61046927SAndroid Build Coastguard Worker typedef uint16_t nodearray_value;
56*61046927SAndroid Build Coastguard Worker 
57*61046927SAndroid Build Coastguard Worker #define NODEARRAY_MAX_VALUE 0xffff
58*61046927SAndroid Build Coastguard Worker 
59*61046927SAndroid Build Coastguard Worker /* Type storing sparse nodearray elements, consisting of a nodearray_value at
60*61046927SAndroid Build Coastguard Worker  * the bottom and a nodearray_key at the top.
61*61046927SAndroid Build Coastguard Worker  */
62*61046927SAndroid Build Coastguard Worker typedef uint64_t nodearray_sparse;
63*61046927SAndroid Build Coastguard Worker 
64*61046927SAndroid Build Coastguard Worker typedef struct {
65*61046927SAndroid Build Coastguard Worker    union {
66*61046927SAndroid Build Coastguard Worker       nodearray_sparse *sparse;
67*61046927SAndroid Build Coastguard Worker       nodearray_value *dense;
68*61046927SAndroid Build Coastguard Worker    };
69*61046927SAndroid Build Coastguard Worker    unsigned size;
70*61046927SAndroid Build Coastguard Worker    unsigned sparse_capacity;
71*61046927SAndroid Build Coastguard Worker } nodearray;
72*61046927SAndroid Build Coastguard Worker 
73*61046927SAndroid Build Coastguard Worker /* Align sizes to 16-bytes for SIMD purposes */
74*61046927SAndroid Build Coastguard Worker #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
75*61046927SAndroid Build Coastguard Worker 
76*61046927SAndroid Build Coastguard Worker #define nodearray_sparse_foreach(buf, elem)                                    \
77*61046927SAndroid Build Coastguard Worker    for (nodearray_sparse *elem = (buf)->sparse;                                \
78*61046927SAndroid Build Coastguard Worker         elem < (buf)->sparse + (buf)->size; elem++)
79*61046927SAndroid Build Coastguard Worker 
80*61046927SAndroid Build Coastguard Worker #define nodearray_dense_foreach(buf, elem)                                     \
81*61046927SAndroid Build Coastguard Worker    for (nodearray_value *elem = (buf)->dense;                                  \
82*61046927SAndroid Build Coastguard Worker         elem < (buf)->dense + (buf)->size; elem++)
83*61046927SAndroid Build Coastguard Worker 
84*61046927SAndroid Build Coastguard Worker #define nodearray_dense_foreach_64(buf, elem)                                  \
85*61046927SAndroid Build Coastguard Worker    for (uint64_t *elem = (uint64_t *)(buf)->dense;                             \
86*61046927SAndroid Build Coastguard Worker         (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
87*61046927SAndroid Build Coastguard Worker 
88*61046927SAndroid Build Coastguard Worker static inline bool
nodearray_is_sparse(const nodearray * a)89*61046927SAndroid Build Coastguard Worker nodearray_is_sparse(const nodearray *a)
90*61046927SAndroid Build Coastguard Worker {
91*61046927SAndroid Build Coastguard Worker    return a->sparse_capacity != ~0U;
92*61046927SAndroid Build Coastguard Worker }
93*61046927SAndroid Build Coastguard Worker 
94*61046927SAndroid Build Coastguard Worker static inline void
nodearray_init(nodearray * a)95*61046927SAndroid Build Coastguard Worker nodearray_init(nodearray *a)
96*61046927SAndroid Build Coastguard Worker {
97*61046927SAndroid Build Coastguard Worker    memset(a, 0, sizeof(nodearray));
98*61046927SAndroid Build Coastguard Worker }
99*61046927SAndroid Build Coastguard Worker 
100*61046927SAndroid Build Coastguard Worker static inline void
nodearray_reset(nodearray * a)101*61046927SAndroid Build Coastguard Worker nodearray_reset(nodearray *a)
102*61046927SAndroid Build Coastguard Worker {
103*61046927SAndroid Build Coastguard Worker    free(a->sparse);
104*61046927SAndroid Build Coastguard Worker    nodearray_init(a);
105*61046927SAndroid Build Coastguard Worker }
106*61046927SAndroid Build Coastguard Worker 
107*61046927SAndroid Build Coastguard Worker static inline nodearray_sparse
nodearray_encode(unsigned key,nodearray_value value)108*61046927SAndroid Build Coastguard Worker nodearray_encode(unsigned key, nodearray_value value)
109*61046927SAndroid Build Coastguard Worker {
110*61046927SAndroid Build Coastguard Worker    static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
111*61046927SAndroid Build Coastguard Worker    return ((nodearray_sparse)key << 16) | value;
112*61046927SAndroid Build Coastguard Worker }
113*61046927SAndroid Build Coastguard Worker 
114*61046927SAndroid Build Coastguard Worker static inline unsigned
nodearray_sparse_key(const nodearray_sparse * elem)115*61046927SAndroid Build Coastguard Worker nodearray_sparse_key(const nodearray_sparse *elem)
116*61046927SAndroid Build Coastguard Worker {
117*61046927SAndroid Build Coastguard Worker    static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
118*61046927SAndroid Build Coastguard Worker    return *elem >> 16;
119*61046927SAndroid Build Coastguard Worker }
120*61046927SAndroid Build Coastguard Worker 
121*61046927SAndroid Build Coastguard Worker static inline nodearray_value
nodearray_sparse_value(const nodearray_sparse * elem)122*61046927SAndroid Build Coastguard Worker nodearray_sparse_value(const nodearray_sparse *elem)
123*61046927SAndroid Build Coastguard Worker {
124*61046927SAndroid Build Coastguard Worker    return *elem & NODEARRAY_MAX_VALUE;
125*61046927SAndroid Build Coastguard Worker }
126*61046927SAndroid Build Coastguard Worker 
127*61046927SAndroid Build Coastguard Worker static inline unsigned
nodearray_sparse_search(const nodearray * a,nodearray_sparse key,nodearray_sparse ** elem)128*61046927SAndroid Build Coastguard Worker nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
129*61046927SAndroid Build Coastguard Worker                         nodearray_sparse **elem)
130*61046927SAndroid Build Coastguard Worker {
131*61046927SAndroid Build Coastguard Worker    assert(nodearray_is_sparse(a) && a->size);
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker    nodearray_sparse *data = a->sparse;
134*61046927SAndroid Build Coastguard Worker 
135*61046927SAndroid Build Coastguard Worker    /* Encode the key using the highest possible value, so that the
136*61046927SAndroid Build Coastguard Worker     * matching node must be encoded lower than this
137*61046927SAndroid Build Coastguard Worker     */
138*61046927SAndroid Build Coastguard Worker    nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
139*61046927SAndroid Build Coastguard Worker 
140*61046927SAndroid Build Coastguard Worker    unsigned left = 0;
141*61046927SAndroid Build Coastguard Worker    unsigned right = a->size - 1;
142*61046927SAndroid Build Coastguard Worker 
143*61046927SAndroid Build Coastguard Worker    if (data[right] <= skey)
144*61046927SAndroid Build Coastguard Worker       left = right;
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker    while (left != right) {
147*61046927SAndroid Build Coastguard Worker       /* No need to worry about overflow, we couldn't have more than
148*61046927SAndroid Build Coastguard Worker        * 2^24 elements */
149*61046927SAndroid Build Coastguard Worker       unsigned probe = (left + right + 1) / 2;
150*61046927SAndroid Build Coastguard Worker 
151*61046927SAndroid Build Coastguard Worker       if (data[probe] > skey)
152*61046927SAndroid Build Coastguard Worker          right = probe - 1;
153*61046927SAndroid Build Coastguard Worker       else
154*61046927SAndroid Build Coastguard Worker          left = probe;
155*61046927SAndroid Build Coastguard Worker    }
156*61046927SAndroid Build Coastguard Worker 
157*61046927SAndroid Build Coastguard Worker    *elem = data + left;
158*61046927SAndroid Build Coastguard Worker    return left;
159*61046927SAndroid Build Coastguard Worker }
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker static inline void
nodearray_orr(nodearray * a,unsigned key,nodearray_value value,unsigned max_sparse,unsigned max)162*61046927SAndroid Build Coastguard Worker nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
163*61046927SAndroid Build Coastguard Worker               unsigned max_sparse, unsigned max)
164*61046927SAndroid Build Coastguard Worker {
165*61046927SAndroid Build Coastguard Worker    assert(key < (1 << 24));
166*61046927SAndroid Build Coastguard Worker    assert(key < max);
167*61046927SAndroid Build Coastguard Worker 
168*61046927SAndroid Build Coastguard Worker    if (!value)
169*61046927SAndroid Build Coastguard Worker       return;
170*61046927SAndroid Build Coastguard Worker 
171*61046927SAndroid Build Coastguard Worker    if (nodearray_is_sparse(a)) {
172*61046927SAndroid Build Coastguard Worker       unsigned size = a->size;
173*61046927SAndroid Build Coastguard Worker       unsigned left = 0;
174*61046927SAndroid Build Coastguard Worker 
175*61046927SAndroid Build Coastguard Worker       if (size) {
176*61046927SAndroid Build Coastguard Worker          /* First, binary search for key */
177*61046927SAndroid Build Coastguard Worker          nodearray_sparse *elem;
178*61046927SAndroid Build Coastguard Worker          left = nodearray_sparse_search(a, key, &elem);
179*61046927SAndroid Build Coastguard Worker 
180*61046927SAndroid Build Coastguard Worker          if (nodearray_sparse_key(elem) == key) {
181*61046927SAndroid Build Coastguard Worker             *elem |= value;
182*61046927SAndroid Build Coastguard Worker             return;
183*61046927SAndroid Build Coastguard Worker          }
184*61046927SAndroid Build Coastguard Worker 
185*61046927SAndroid Build Coastguard Worker          /* We insert before `left`, so increment it if it's
186*61046927SAndroid Build Coastguard Worker           * out of order */
187*61046927SAndroid Build Coastguard Worker          if (nodearray_sparse_key(elem) < key)
188*61046927SAndroid Build Coastguard Worker             ++left;
189*61046927SAndroid Build Coastguard Worker       }
190*61046927SAndroid Build Coastguard Worker 
191*61046927SAndroid Build Coastguard Worker       if (size < max_sparse && (size + 1) < max / 4) {
192*61046927SAndroid Build Coastguard Worker          /* We didn't find it, but we know where to insert it. */
193*61046927SAndroid Build Coastguard Worker 
194*61046927SAndroid Build Coastguard Worker          nodearray_sparse *data = a->sparse;
195*61046927SAndroid Build Coastguard Worker          nodearray_sparse *data_move = data + left;
196*61046927SAndroid Build Coastguard Worker 
197*61046927SAndroid Build Coastguard Worker          bool realloc = (++a->size) > a->sparse_capacity;
198*61046927SAndroid Build Coastguard Worker 
199*61046927SAndroid Build Coastguard Worker          if (realloc) {
200*61046927SAndroid Build Coastguard Worker             a->sparse_capacity =
201*61046927SAndroid Build Coastguard Worker                MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
202*61046927SAndroid Build Coastguard Worker 
203*61046927SAndroid Build Coastguard Worker             a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
204*61046927SAndroid Build Coastguard Worker                                                    sizeof(nodearray_sparse));
205*61046927SAndroid Build Coastguard Worker 
206*61046927SAndroid Build Coastguard Worker             if (left)
207*61046927SAndroid Build Coastguard Worker                memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
208*61046927SAndroid Build Coastguard Worker          }
209*61046927SAndroid Build Coastguard Worker 
210*61046927SAndroid Build Coastguard Worker          nodearray_sparse *elem = a->sparse + left;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker          if (left != size)
213*61046927SAndroid Build Coastguard Worker             memmove(elem + 1, data_move,
214*61046927SAndroid Build Coastguard Worker                     (size - left) * sizeof(nodearray_sparse));
215*61046927SAndroid Build Coastguard Worker 
216*61046927SAndroid Build Coastguard Worker          *elem = nodearray_encode(key, value);
217*61046927SAndroid Build Coastguard Worker 
218*61046927SAndroid Build Coastguard Worker          if (realloc)
219*61046927SAndroid Build Coastguard Worker             free(data);
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker          return;
222*61046927SAndroid Build Coastguard Worker       }
223*61046927SAndroid Build Coastguard Worker 
224*61046927SAndroid Build Coastguard Worker       /* There are too many elements, so convert to a dense array */
225*61046927SAndroid Build Coastguard Worker       nodearray old = *a;
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker       a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
228*61046927SAndroid Build Coastguard Worker                                            sizeof(nodearray_value));
229*61046927SAndroid Build Coastguard Worker       a->size = max;
230*61046927SAndroid Build Coastguard Worker       a->sparse_capacity = ~0U;
231*61046927SAndroid Build Coastguard Worker 
232*61046927SAndroid Build Coastguard Worker       nodearray_value *data = a->dense;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker       nodearray_sparse_foreach(&old, x) {
235*61046927SAndroid Build Coastguard Worker          unsigned key = nodearray_sparse_key(x);
236*61046927SAndroid Build Coastguard Worker          nodearray_value value = nodearray_sparse_value(x);
237*61046927SAndroid Build Coastguard Worker 
238*61046927SAndroid Build Coastguard Worker          assert(key < max);
239*61046927SAndroid Build Coastguard Worker          data[key] = value;
240*61046927SAndroid Build Coastguard Worker       }
241*61046927SAndroid Build Coastguard Worker 
242*61046927SAndroid Build Coastguard Worker       free(old.sparse);
243*61046927SAndroid Build Coastguard Worker    }
244*61046927SAndroid Build Coastguard Worker 
245*61046927SAndroid Build Coastguard Worker    a->dense[key] |= value;
246*61046927SAndroid Build Coastguard Worker }
247*61046927SAndroid Build Coastguard Worker 
248*61046927SAndroid Build Coastguard Worker #ifdef __cplusplus
249*61046927SAndroid Build Coastguard Worker } /* extern C */
250*61046927SAndroid Build Coastguard Worker #endif
251*61046927SAndroid Build Coastguard Worker 
252*61046927SAndroid Build Coastguard Worker #endif
253