1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nvk_edb_bview_cache.h"
7
8 #include "nil.h"
9 #include "nvk_device.h"
10 #include "nvk_descriptor_types.h"
11 #include "nvk_physical_device.h"
12
13 #include "util/format/u_format.h"
14 #include "util/hash_table.h"
15
16 PRAGMA_DIAGNOSTIC_PUSH
17 PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
18 struct bvdesc_key {
19 uint16_t format;
20 uint16_t chunk : 12;
21 uint16_t rgb_offset : 4;
22 };
23 PRAGMA_DIAGNOSTIC_POP
24 static_assert(sizeof(struct bvdesc_key) == 4, "bvdesc_key has no holes");
25
26 static uint64_t
view_size_B(enum pipe_format format)27 view_size_B(enum pipe_format format)
28 {
29 const uint8_t el_size_B = util_format_get_blocksize(format);
30 if (util_is_power_of_two_nonzero(el_size_B)) {
31 return 4ull << 30;
32 } else {
33 /* On Ampere (but not Turing or Maxwell for some reason), we're limited
34 * to 3GB for RGB32 buffers.
35 */
36 assert(util_format_get_nr_components(format) == 3);
37 return 3ull << 30;
38 }
39 }
40
41 /* Stride in VA between views */
42 static uint64_t
view_stride_B(enum pipe_format format)43 view_stride_B(enum pipe_format format)
44 {
45 return view_size_B(format) / 2;
46 }
47
48 static uint32_t
view_size_el(enum pipe_format format)49 view_size_el(enum pipe_format format)
50 {
51 /* If someone uses the last element of this chunk, then they're a max-sized
52 * client view which starts at the middle of this chunk and therefore
53 * should be in the next chunk.
54 */
55 return (view_size_B(format) / util_format_get_blocksize(format)) - 1;
56 }
57
58 static uint64_t
base_addr_for_chunk(struct nvk_device * dev,uint16_t chunk,enum pipe_format format)59 base_addr_for_chunk(struct nvk_device *dev, uint16_t chunk,
60 enum pipe_format format)
61 {
62 return dev->nvkmd->va_start + chunk * view_stride_B(format);
63 }
64
65 static uint64_t
chunk_for_addr(struct nvk_device * dev,uint64_t addr,enum pipe_format format)66 chunk_for_addr(struct nvk_device *dev, uint64_t addr, enum pipe_format format)
67 {
68 assert(addr >= dev->nvkmd->va_start);
69 return (addr - dev->nvkmd->va_start) / view_stride_B(format);
70 }
71
72 static VkResult
nvk_edb_bview_cache_add_bview(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,struct bvdesc_key key)73 nvk_edb_bview_cache_add_bview(struct nvk_device *dev,
74 struct nvk_edb_bview_cache *cache,
75 struct bvdesc_key key)
76 {
77 void *void_key = NULL;
78 STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
79 memcpy(&void_key, &key, sizeof(key));
80
81 const uint64_t base_addr =
82 base_addr_for_chunk(dev, key.chunk, key.format) + key.rgb_offset;
83
84 uint32_t size_el = view_size_el(key.format);
85
86 const uint8_t el_size_B = util_format_get_blocksize(key.format);
87 if (base_addr + (uint64_t)size_el * el_size_B > dev->nvkmd->va_end) {
88 const uint64_t size_B = dev->nvkmd->va_end - base_addr;
89 size_el = size_B / el_size_B;
90 }
91
92 uint32_t desc[8];
93 nil_buffer_fill_tic(&nvk_device_physical(dev)->info, base_addr,
94 nil_format(key.format), size_el, &desc);
95
96 uint32_t index;
97 VkResult result = nvk_descriptor_table_add(dev, &dev->images,
98 desc, sizeof(desc), &index);
99 if (result != VK_SUCCESS)
100 return result;
101
102 _mesa_hash_table_insert(cache->cache, void_key, (void *)(uintptr_t)index);
103
104 return VK_SUCCESS;
105 }
106
107 static uint32_t
nvk_edb_bview_cache_lookup_bview(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,struct bvdesc_key key)108 nvk_edb_bview_cache_lookup_bview(struct nvk_device *dev,
109 struct nvk_edb_bview_cache *cache,
110 struct bvdesc_key key)
111 {
112 void *void_key = NULL;
113 STATIC_ASSERT(sizeof(key) <= sizeof(void_key));
114 memcpy(&void_key, &key, sizeof(key));
115
116 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, void_key);
117 if (entry != NULL) {
118 return (uintptr_t)entry->data;
119 } else {
120 return 0;
121 }
122 }
123
124 VkResult
nvk_edb_bview_cache_init(struct nvk_device * dev,struct nvk_edb_bview_cache * cache)125 nvk_edb_bview_cache_init(struct nvk_device *dev,
126 struct nvk_edb_bview_cache *cache)
127 {
128 struct nvk_physical_device *pdev = nvk_device_physical(dev);
129 VkResult result;
130
131 cache->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
132 _mesa_key_pointer_equal);
133 if (cache->cache == NULL)
134 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
135
136 for (uint32_t format = 0; format < PIPE_FORMAT_COUNT; format++) {
137 if (!nil_format_supports_buffer(&pdev->info, format))
138 continue;
139
140 const uint8_t el_size_B = util_format_get_blocksize(format);
141
142 for (uint16_t chunk = 0;; chunk++) {
143 if (base_addr_for_chunk(dev, chunk, format) >= dev->nvkmd->va_end)
144 break;
145
146 assert(format <= UINT16_MAX);
147 assert(chunk < (1u << 12));
148
149 if (!util_is_power_of_two_nonzero(el_size_B)) {
150 assert(util_format_get_nr_components(format) == 3);
151 assert(el_size_B % 3 == 0);
152 const uint8_t chan_size_B = el_size_B / 3;
153 for (uint8_t chan = 0; chan < 3; chan++) {
154 struct bvdesc_key key = {
155 .format = format,
156 .chunk = chunk,
157 .rgb_offset = chan * chan_size_B,
158 };
159 result = nvk_edb_bview_cache_add_bview(dev, cache, key);
160 if (result != VK_SUCCESS)
161 goto fail;
162 }
163 } else {
164 struct bvdesc_key key = {
165 .format = format,
166 .chunk = chunk,
167 };
168 result = nvk_edb_bview_cache_add_bview(dev, cache, key);
169 if (result != VK_SUCCESS)
170 goto fail;
171 }
172 }
173 }
174
175 return VK_SUCCESS;
176
177 fail:
178 _mesa_hash_table_destroy(cache->cache, NULL);
179 return result;
180 }
181
182 void
nvk_edb_bview_cache_finish(struct nvk_device * dev,struct nvk_edb_bview_cache * cache)183 nvk_edb_bview_cache_finish(struct nvk_device *dev,
184 struct nvk_edb_bview_cache *cache)
185 {
186 /* We don't bother freeing the descriptors as those will be cleaned up
187 * automatically when the device is destroyed.
188 */
189 if (cache->cache)
190 _mesa_hash_table_destroy(cache->cache, NULL);
191 }
192
193 struct nvk_edb_buffer_view_descriptor
nvk_edb_bview_cache_get_descriptor(struct nvk_device * dev,struct nvk_edb_bview_cache * cache,uint64_t base_addr,uint64_t size_B,enum pipe_format format)194 nvk_edb_bview_cache_get_descriptor(struct nvk_device *dev,
195 struct nvk_edb_bview_cache *cache,
196 uint64_t base_addr, uint64_t size_B,
197 enum pipe_format format)
198 {
199 /* The actual hardware limit for buffer image/texture descriptors is 4GB
200 * regardless of format. This cache works by covering the address space
201 * with 4GB buffer descriptors at 2GB offsets. In order for this to work
202 * properly, the size if the client's buffer view must be at most 2 GB.
203 */
204 assert(size_B <= view_stride_B(format));
205
206 const uint8_t el_size_B = util_format_get_blocksize(format);
207 const uint64_t size_el = size_B / el_size_B;
208
209 const uint64_t chunk = chunk_for_addr(dev, base_addr, format);
210 const uint64_t desc_base_addr = base_addr_for_chunk(dev, chunk, format);
211 const uint32_t offset_B = base_addr - desc_base_addr;
212
213 const uint32_t offset_el = offset_B / el_size_B;
214
215 uint16_t rgb_offset = 0;
216 if (!util_is_power_of_two_nonzero(el_size_B)) {
217 assert(util_format_get_nr_components(format) == 3);
218 assert(el_size_B % 3 == 0);
219 rgb_offset = offset_B % el_size_B;
220 } else {
221 assert(offset_B % el_size_B == 0);
222 }
223
224 assert(offset_el + size_el > offset_el);
225 assert(offset_el + size_el <= view_size_el(format));
226
227 assert(format <= UINT16_MAX);
228 assert(chunk < (1u << 12));
229 assert(rgb_offset < (1u << 4));
230 const struct bvdesc_key key = {
231 .format = format,
232 .chunk = chunk,
233 .rgb_offset = rgb_offset,
234 };
235 uint32_t index = nvk_edb_bview_cache_lookup_bview(dev, cache, key);
236
237 uint32_t oob_alpha;
238 if (util_format_has_alpha(format)) {
239 /* OOB reads as if it read 0 texture data so an RGBA format reads
240 * (0, 0, 0, 0) out-of-bounds.
241 */
242 oob_alpha = 0;
243 } else if (util_format_is_pure_integer(format)) {
244 /* OOB reads 0 texture data but then gets extended by (0, 0, 0, 1) */
245 oob_alpha = 1;
246 } else {
247 /* OOB reads 0 texture data but then gets extended by
248 * (0.0, 0.0, 0.0, 1.0)
249 */
250 oob_alpha = 0x3f800000;
251 }
252
253 return (struct nvk_edb_buffer_view_descriptor) {
254 .index = index,
255 .offset_el = offset_el,
256 .size_el = size_el,
257 .oob_alpha = oob_alpha,
258 };
259 }
260