xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_edb_bview_cache.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nvk_edb_bview_cache.h"
7 
8 #include "nil.h"
9 #include "nvk_device.h"
10 #include "nvk_descriptor_types.h"
11 #include "nvk_physical_device.h"
12 
13 #include "util/format/u_format.h"
14 #include "util/hash_table.h"
15 
16 PRAGMA_DIAGNOSTIC_PUSH
17 PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
18 struct bvdesc_key {
19    uint16_t format;
20    uint16_t chunk : 12;
21    uint16_t rgb_offset : 4;
22 };
23 PRAGMA_DIAGNOSTIC_POP
24 static_assert(sizeof(struct bvdesc_key) == 4, "bvdesc_key has no holes");
25 
26 static uint64_t
view_size_B(enum pipe_format format)27 view_size_B(enum pipe_format format)
28 {
29    const uint8_t el_size_B = util_format_get_blocksize(format);
30    if (util_is_power_of_two_nonzero(el_size_B)) {
31       return 4ull << 30;
32    } else {
33       /* On Ampere (but not Turing or Maxwell for some reason), we're limited
34        * to 3GB for RGB32 buffers.
35        */
36       assert(util_format_get_nr_components(format) == 3);
37       return 3ull << 30;
38    }
39 }
40 
41 /* Stride in VA between views */
42 static uint64_t
view_stride_B(enum pipe_format format)43 view_stride_B(enum pipe_format format)
44 {
45    return view_size_B(format) / 2;
46 }
47 
48 static uint32_t
view_size_el(enum pipe_format format)49 view_size_el(enum pipe_format format)
50 {
51    /* If someone uses the last element of this chunk, then they're a max-sized
52     * client view which starts at the middle of this chunk and therefore
53     * should be in the next chunk.
54     */
55    return (view_size_B(format) / util_format_get_blocksize(format)) - 1;
56 }
57 
58 static uint64_t
base_addr_for_chunk(struct nvk_device * dev,uint16_t chunk,enum pipe_format format)59 base_addr_for_chunk(struct nvk_device *dev, uint16_t chunk,
60                     enum pipe_format format)
61 {
62    return dev->nvkmd->va_start + chunk * view_stride_B(format);
63 }
64 
65 static uint64_t
chunk_for_addr(struct nvk_device * dev,uint64_t addr,enum pipe_format format)66 chunk_for_addr(struct nvk_device *dev, uint64_t addr, enum pipe_format format)
67 {
68    assert(addr >= dev->nvkmd->va_start);
69    return (addr - dev->nvkmd->va_start) / view_stride_B(format);
70 }
71 
72 static VkResult
nvk_edb_bview_cache_add_bview(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,struct bvdesc_key key)73 nvk_edb_bview_cache_add_bview(struct nvk_device *dev,
74                               struct nvk_edb_bview_cache *cache,
75                               struct bvdesc_key key)
76 {
77    void *void_key = NULL;
78    STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
79    memcpy(&void_key, &key, sizeof(key));
80 
81    const uint64_t base_addr =
82       base_addr_for_chunk(dev, key.chunk, key.format) + key.rgb_offset;
83 
84    uint32_t size_el = view_size_el(key.format);
85 
86    const uint8_t el_size_B = util_format_get_blocksize(key.format);
87    if (base_addr + (uint64_t)size_el * el_size_B > dev->nvkmd->va_end) {
88       const uint64_t size_B = dev->nvkmd->va_end - base_addr;
89       size_el = size_B / el_size_B;
90    }
91 
92    uint32_t desc[8];
93    nil_buffer_fill_tic(&nvk_device_physical(dev)->info, base_addr,
94                        nil_format(key.format), size_el, &desc);
95 
96    uint32_t index;
97    VkResult result = nvk_descriptor_table_add(dev, &dev->images,
98                                               desc, sizeof(desc), &index);
99    if (result != VK_SUCCESS)
100       return result;
101 
102    _mesa_hash_table_insert(cache->cache, void_key, (void *)(uintptr_t)index);
103 
104    return VK_SUCCESS;
105 }
106 
107 static uint32_t
nvk_edb_bview_cache_lookup_bview(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,struct bvdesc_key key)108 nvk_edb_bview_cache_lookup_bview(struct nvk_device *dev,
109                                  struct nvk_edb_bview_cache *cache,
110                                  struct bvdesc_key key)
111 {
112    void *void_key = NULL;
113    STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
114    memcpy(&void_key, &key, sizeof(key));
115 
116    struct hash_entry *entry = _mesa_hash_table_search(cache->cache, void_key);
117    if (entry != NULL) {
118       return (uintptr_t)entry->data;
119    } else {
120       return 0;
121    }
122 }
123 
124 VkResult
nvk_edb_bview_cache_init(struct nvk_device * dev,struct nvk_edb_bview_cache * cache)125 nvk_edb_bview_cache_init(struct nvk_device *dev,
126                          struct nvk_edb_bview_cache *cache)
127 {
128    struct nvk_physical_device *pdev = nvk_device_physical(dev);
129    VkResult result;
130 
131    cache->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
132                                           _mesa_key_pointer_equal);
133    if (cache->cache == NULL)
134       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
135 
136    for (uint32_t format = 0; format < PIPE_FORMAT_COUNT; format++) {
137       if (!nil_format_supports_buffer(&pdev->info, format))
138          continue;
139 
140       const uint8_t el_size_B = util_format_get_blocksize(format);
141 
142       for (uint16_t chunk = 0;; chunk++) {
143          if (base_addr_for_chunk(dev, chunk, format) >= dev->nvkmd->va_end)
144             break;
145 
146          assert(format <= UINT16_MAX);
147          assert(chunk < (1u << 12));
148 
149          if (!util_is_power_of_two_nonzero(el_size_B)) {
150             assert(util_format_get_nr_components(format) == 3);
151             assert(el_size_B % 3 == 0);
152             const uint8_t chan_size_B = el_size_B / 3;
153             for (uint8_t chan = 0; chan < 3; chan++) {
154                struct bvdesc_key key = {
155                   .format = format,
156                   .chunk = chunk,
157                   .rgb_offset = chan * chan_size_B,
158                };
159                result = nvk_edb_bview_cache_add_bview(dev, cache, key);
160                if (result != VK_SUCCESS)
161                   goto fail;
162             }
163          } else {
164             struct bvdesc_key key = {
165                .format = format,
166                .chunk = chunk,
167             };
168             result = nvk_edb_bview_cache_add_bview(dev, cache, key);
169             if (result != VK_SUCCESS)
170                goto fail;
171          }
172       }
173    }
174 
175    return VK_SUCCESS;
176 
177 fail:
178    _mesa_hash_table_destroy(cache->cache, NULL);
179    return result;
180 }
181 
182 void
nvk_edb_bview_cache_finish(struct nvk_device * dev,struct nvk_edb_bview_cache * cache)183 nvk_edb_bview_cache_finish(struct nvk_device *dev,
184                            struct nvk_edb_bview_cache *cache)
185 {
186    /* We don't bother freeing the descriptors as those will be cleaned up
187     * automatically when the device is destroyed.
188     */
189    if (cache->cache)
190       _mesa_hash_table_destroy(cache->cache, NULL);
191 }
192 
193 struct nvk_edb_buffer_view_descriptor
nvk_edb_bview_cache_get_descriptor(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,uint64_t base_addr,uint64_t size_B,enum pipe_format format)194 nvk_edb_bview_cache_get_descriptor(struct nvk_device *dev,
195                                    struct nvk_edb_bview_cache *cache,
196                                    uint64_t base_addr, uint64_t size_B,
197                                    enum pipe_format format)
198 {
199    /* The actual hardware limit for buffer image/texture descriptors is 4GB
200     * regardless of format.  This cache works by covering the address space
201     * with 4GB buffer descriptors at 2GB offsets.  In order for this to work
202     * properly, the size if the client's buffer view must be at most 2 GB.
203     */
204    assert(size_B <= view_stride_B(format));
205 
206    const uint8_t el_size_B = util_format_get_blocksize(format);
207    const uint64_t size_el = size_B / el_size_B;
208 
209    const uint64_t chunk = chunk_for_addr(dev, base_addr, format);
210    const uint64_t desc_base_addr = base_addr_for_chunk(dev, chunk, format);
211    const uint32_t offset_B = base_addr - desc_base_addr;
212 
213    const uint32_t offset_el = offset_B / el_size_B;
214 
215    uint16_t rgb_offset = 0;
216    if (!util_is_power_of_two_nonzero(el_size_B)) {
217       assert(util_format_get_nr_components(format) == 3);
218       assert(el_size_B % 3 == 0);
219       rgb_offset = offset_B % el_size_B;
220    } else {
221       assert(offset_B % el_size_B == 0);
222    }
223 
224    assert(offset_el + size_el > offset_el);
225    assert(offset_el + size_el <= view_size_el(format));
226 
227    assert(format <= UINT16_MAX);
228    assert(chunk < (1u << 12));
229    assert(rgb_offset < (1u << 4));
230    const struct bvdesc_key key = {
231       .format = format,
232       .chunk = chunk,
233       .rgb_offset = rgb_offset,
234    };
235    uint32_t index = nvk_edb_bview_cache_lookup_bview(dev, cache, key);
236 
237    uint32_t oob_alpha;
238    if (util_format_has_alpha(format)) {
239       /* OOB reads as if it read 0 texture data so an RGBA format reads
240        * (0, 0, 0, 0) out-of-bounds.
241        */
242       oob_alpha = 0;
243    } else if (util_format_is_pure_integer(format)) {
244       /* OOB reads 0 texture data but then gets extended by (0, 0, 0, 1) */
245       oob_alpha = 1;
246    } else {
247       /* OOB reads 0 texture data but then gets extended by
248        * (0.0, 0.0, 0.0, 1.0)
249        */
250       oob_alpha = 0x3f800000;
251    }
252 
253    return (struct nvk_edb_buffer_view_descriptor) {
254       .index = index,
255       .offset_el = offset_el,
256       .size_el = size_el,
257       .oob_alpha = oob_alpha,
258    };
259 }
260