/* * Copyright (C) 2021 Icecream95 * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* A nodearray is an array type that is either sparse or dense, depending on * the number of elements. * * When the number of elements is over a threshold (max_sparse), the dense mode * is used, and the nodearray is simply a container for an array. * * In sparse mode, the array has elements with a 24-bit node index and a value. * The nodes are always sorted, so that a binary search can be used to find * elements. Nonexistent elements are treated as zero. * * Function names follow ARM instruction names: orr does *elem |= value. * * Although it's probably already fast enough, the datastructure could be sped * up a lot, especially when NEON is available, by making the sparse mode store * sixteen adjacent values, so that adding new keys also allocates nearby keys, * and to allow for vectorising iteration, as can be done when in the dense * mode. */ #ifndef __BIFROST_NODEARRAY_H #define __BIFROST_NODEARRAY_H #include #ifdef __cplusplus extern "C" { #endif /* A value that may be stored in a nodearray element, used directly for dense * elements and included into sparse elements. */ typedef uint16_t nodearray_value; #define NODEARRAY_MAX_VALUE 0xffff /* Type storing sparse nodearray elements, consisting of a nodearray_value at * the bottom and a nodearray_key at the top. */ typedef uint64_t nodearray_sparse; typedef struct { union { nodearray_sparse *sparse; nodearray_value *dense; }; unsigned size; unsigned sparse_capacity; } nodearray; /* Align sizes to 16-bytes for SIMD purposes */ #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16) #define nodearray_sparse_foreach(buf, elem) \ for (nodearray_sparse *elem = (buf)->sparse; \ elem < (buf)->sparse + (buf)->size; elem++) #define nodearray_dense_foreach(buf, elem) \ for (nodearray_value *elem = (buf)->dense; \ elem < (buf)->dense + (buf)->size; elem++) #define nodearray_dense_foreach_64(buf, elem) \ for (uint64_t *elem = (uint64_t *)(buf)->dense; \ (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++) static inline bool nodearray_is_sparse(const nodearray *a) { return a->sparse_capacity != ~0U; } static inline void nodearray_init(nodearray *a) { memset(a, 0, sizeof(nodearray)); } static inline void nodearray_reset(nodearray *a) { free(a->sparse); nodearray_init(a); } static inline nodearray_sparse nodearray_encode(unsigned key, nodearray_value value) { static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch"); return ((nodearray_sparse)key << 16) | value; } static inline unsigned nodearray_sparse_key(const nodearray_sparse *elem) { static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch"); return *elem >> 16; } static inline nodearray_value nodearray_sparse_value(const nodearray_sparse *elem) { return *elem & NODEARRAY_MAX_VALUE; } static inline unsigned nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem) { assert(nodearray_is_sparse(a) && a->size); nodearray_sparse *data = a->sparse; /* Encode the key using the highest possible value, so that the * matching node must be encoded lower than this */ nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE); unsigned left = 0; unsigned right = a->size - 1; if (data[right] <= skey) left = right; while (left != right) { /* No need to worry about overflow, we couldn't have more than * 2^24 elements */ unsigned probe = (left + right + 1) / 2; if (data[probe] > skey) right = probe - 1; else left = probe; } *elem = data + left; return left; } static inline void nodearray_orr(nodearray *a, unsigned key, nodearray_value value, unsigned max_sparse, unsigned max) { assert(key < (1 << 24)); assert(key < max); if (!value) return; if (nodearray_is_sparse(a)) { unsigned size = a->size; unsigned left = 0; if (size) { /* First, binary search for key */ nodearray_sparse *elem; left = nodearray_sparse_search(a, key, &elem); if (nodearray_sparse_key(elem) == key) { *elem |= value; return; } /* We insert before `left`, so increment it if it's * out of order */ if (nodearray_sparse_key(elem) < key) ++left; } if (size < max_sparse && (size + 1) < max / 4) { /* We didn't find it, but we know where to insert it. */ nodearray_sparse *data = a->sparse; nodearray_sparse *data_move = data + left; bool realloc = (++a->size) > a->sparse_capacity; if (realloc) { a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4); a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse)); if (left) memcpy(a->sparse, data, left * sizeof(nodearray_sparse)); } nodearray_sparse *elem = a->sparse + left; if (left != size) memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse)); *elem = nodearray_encode(key, value); if (realloc) free(data); return; } /* There are too many elements, so convert to a dense array */ nodearray old = *a; a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value)); a->size = max; a->sparse_capacity = ~0U; nodearray_value *data = a->dense; nodearray_sparse_foreach(&old, x) { unsigned key = nodearray_sparse_key(x); nodearray_value value = nodearray_sparse_value(x); assert(key < max); data[key] = value; } free(old.sparse); } a->dense[key] |= value; } #ifdef __cplusplus } /* extern C */ #endif #endif