1 /*
2 * Copyright © 2023 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <vulkan/vulkan_core.h>
28
29 #include "hwdef/rogue_hw_utils.h"
30 #include "pvr_bo.h"
31 #include "pvr_common.h"
32 #include "pvr_device_info.h"
33 #include "pvr_job_transfer.h"
34 #include "pvr_pds.h"
35 #include "pvr_private.h"
36 #include "pvr_transfer_frag_store.h"
37 #include "pvr_types.h"
38 #include "pvr_uscgen.h"
39 #include "util/hash_table.h"
40 #include "util/macros.h"
41 #include "util/ralloc.h"
42 #include "util/u_dynarray.h"
43 #include "util/u_math.h"
44 #include "vk_log.h"
45
46 #define PVR_TRANSFER_BYTE_UNWIND_MAX 16U
47
48 struct pvr_transfer_frag_store_entry_data {
49 pvr_dev_addr_t kick_usc_pds_offset;
50 struct pvr_bo *kick_usc_pds_upload;
51
52 struct pvr_suballoc_bo *usc_upload;
53 struct pvr_tq_frag_sh_reg_layout sh_reg_layout;
54 };
55
56 #define to_pvr_entry_data(_entry) \
57 _Generic((_entry), \
58 struct hash_entry *: (struct pvr_transfer_frag_store_entry_data *)((_entry)->data), \
59 const struct hash_entry *: (const struct pvr_transfer_frag_store_entry_data *)((_entry)->data))
60
pvr_transfer_frag_store_init(struct pvr_device * device,struct pvr_transfer_frag_store * store)61 VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
62 struct pvr_transfer_frag_store *store)
63 {
64 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
65
66 *store = (struct pvr_transfer_frag_store){
67 .max_multisample = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 1U),
68 .hash_table = _mesa_hash_table_create_u32_keys(NULL),
69 };
70
71 if (!store->hash_table)
72 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
73
74 return VK_SUCCESS;
75 }
76
77 /**
78 * \brief Returns a key based on shader properties.
79 *
80 * Returns a unique key that can be used to uniquely identify a transfer
81 * fragment shader based on the provided shader properties.
82 *
83 * Make sure that the non valid parts of shader_props are memset to 0. Otherwise
84 * these bits might appear in the key as uninitialized data and might not
85 * match a key for the same shader.
86 */
pvr_transfer_frag_shader_key(uint32_t max_multisample,const struct pvr_tq_shader_properties * shader_props)87 static uint32_t pvr_transfer_frag_shader_key(
88 uint32_t max_multisample,
89 const struct pvr_tq_shader_properties *shader_props)
90 {
91 const struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
92 uint32_t resolve_op_num = max_multisample + PVR_RESOLVE_SAMPLE0;
93
94 uint32_t num_layers_bits = util_logbase2_ceil(PVR_TRANSFER_MAX_LAYERS + 1U);
95 uint32_t layer_float_bits = util_logbase2_ceil(PVR_INT_COORD_SET_FLOATS_NUM);
96 uint32_t pixel_src_bits = util_logbase2_ceil(PVR_TRANSFER_PBE_PIXEL_SRC_NUM);
97 uint32_t byte_unwind_bits = util_logbase2_ceil(PVR_TRANSFER_BYTE_UNWIND_MAX);
98 uint32_t resolve_op_bits = util_logbase2_ceil(resolve_op_num);
99 uint32_t sample_cnt_bits = util_last_bit(util_logbase2(max_multisample));
100 uint32_t hash = 0U;
101
102 #if MESA_DEBUG
103 uint32_t max_shift = 0U;
104 # define shift_hash(hash, num) \
105 do { \
106 max_shift += (num); \
107 assert(max_shift <= 32U); \
108 \
109 (hash) <<= (num); \
110 } while (0U)
111 #else
112 # define shift_hash(hash, num) hash <<= (num)
113 #endif
114
115 /* Hash layer info. */
116
117 shift_hash(hash, layer_float_bits);
118 hash |= (uint32_t)shader_props->layer_props.layer_floats;
119
120 shift_hash(hash, 1U);
121 hash |= layer->sample;
122
123 shift_hash(hash, 1U);
124 hash |= (uint32_t) false;
125
126 shift_hash(hash, 1U);
127 hash |= (uint32_t) false;
128
129 shift_hash(hash, pixel_src_bits);
130 hash |= (uint32_t)layer->pbe_format;
131
132 shift_hash(hash, resolve_op_bits);
133 hash |= (uint32_t)layer->resolve_op;
134
135 assert(util_is_power_of_two_nonzero(layer->sample_count));
136 shift_hash(hash, sample_cnt_bits);
137 hash |= (uint32_t)util_logbase2(layer->sample_count);
138
139 shift_hash(hash, 1U);
140 hash |= (uint32_t)layer->msaa;
141
142 shift_hash(hash, byte_unwind_bits);
143 hash |= layer->byte_unwind;
144
145 shift_hash(hash, 1U);
146 hash |= (uint32_t)layer->linear;
147
148 /* End layer info. */
149
150 shift_hash(hash, 1U);
151 hash |= (uint32_t)shader_props->full_rate;
152
153 shift_hash(hash, 1U);
154 hash |= (uint32_t)shader_props->iterated;
155
156 shift_hash(hash, 1U);
157 hash |= (uint32_t)shader_props->pick_component;
158
159 shift_hash(hash, num_layers_bits);
160 /* Just 1 layer. */
161 hash |= 1;
162
163 shift_hash(hash, 3U);
164 /* alpha type none */
165 hash |= 0;
166
167 #undef shift_hash
168
169 return hash;
170 }
171
172 #define to_hash_table_key(_key) ((void *)(uintptr_t)(_key))
173
pvr_transfer_frag_store_entry_data_compile(struct pvr_device * device,struct pvr_transfer_frag_store_entry_data * const entry_data,const struct pvr_tq_shader_properties * shader_props,uint32_t * const num_usc_temps_out)174 static VkResult pvr_transfer_frag_store_entry_data_compile(
175 struct pvr_device *device,
176 struct pvr_transfer_frag_store_entry_data *const entry_data,
177 const struct pvr_tq_shader_properties *shader_props,
178 uint32_t *const num_usc_temps_out)
179 {
180 const uint32_t image_desc_offset =
181 offsetof(struct pvr_combined_image_sampler_descriptor, image) / 4;
182 const uint32_t sampler_desc_offset =
183 offsetof(struct pvr_combined_image_sampler_descriptor, sampler) / 4;
184
185 const uint32_t cache_line_size =
186 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
187
188 struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
189 uint32_t next_free_sh_reg = 0;
190 struct util_dynarray shader;
191 VkResult result;
192
193 /* TODO: Allocate all combined image samplers if needed? Otherwise change the
194 * array to a single descriptor.
195 */
196 sh_reg_layout->combined_image_samplers.offsets[0].image =
197 next_free_sh_reg + image_desc_offset;
198 sh_reg_layout->combined_image_samplers.offsets[0].sampler =
199 next_free_sh_reg + sampler_desc_offset;
200 sh_reg_layout->combined_image_samplers.count = 1;
201 next_free_sh_reg += sizeof(struct pvr_combined_image_sampler_descriptor) / 4;
202
203 /* TODO: Handle dynamic_const_regs used for PVR_INT_COORD_SET_FLOATS_{4,6}, Z
204 * position, texel unwind, etc. when compiler adds support for them.
205 */
206 sh_reg_layout->dynamic_consts.offset = next_free_sh_reg;
207 sh_reg_layout->dynamic_consts.count = 0;
208
209 sh_reg_layout->driver_total = next_free_sh_reg;
210
211 pvr_uscgen_tq_frag(shader_props,
212 &entry_data->sh_reg_layout,
213 num_usc_temps_out,
214 &shader);
215
216 result = pvr_gpu_upload_usc(device,
217 util_dynarray_begin(&shader),
218 util_dynarray_num_elements(&shader, uint8_t),
219 cache_line_size,
220 &entry_data->usc_upload);
221 util_dynarray_fini(&shader);
222 if (result != VK_SUCCESS)
223 return result;
224
225 return VK_SUCCESS;
226 }
227
pvr_transfer_frag_store_entry_data_create(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,const struct pvr_transfer_frag_store_entry_data ** const entry_data_out)228 static VkResult pvr_transfer_frag_store_entry_data_create(
229 struct pvr_device *device,
230 struct pvr_transfer_frag_store *store,
231 const struct pvr_tq_shader_properties *shader_props,
232 const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
233 {
234 struct pvr_pds_kickusc_program kick_usc_pds_prog = { 0 };
235 struct pvr_transfer_frag_store_entry_data *entry_data;
236 pvr_dev_addr_t dev_addr;
237 uint32_t num_usc_temps;
238 VkResult result;
239
240 entry_data = ralloc(store->hash_table, __typeof__(*entry_data));
241 if (!entry_data)
242 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
243
244 result = pvr_transfer_frag_store_entry_data_compile(device,
245 entry_data,
246 shader_props,
247 &num_usc_temps);
248 if (result != VK_SUCCESS)
249 goto err_free_entry;
250
251 dev_addr = entry_data->usc_upload->dev_addr;
252 dev_addr.addr -= device->heaps.usc_heap->base_addr.addr;
253
254 pvr_pds_setup_doutu(&kick_usc_pds_prog.usc_task_control,
255 dev_addr.addr,
256 num_usc_temps,
257 shader_props->full_rate
258 ? PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL)
259 : PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
260 false);
261
262 pvr_pds_kick_usc(&kick_usc_pds_prog, NULL, 0U, false, PDS_GENERATE_SIZES);
263
264 result = pvr_bo_alloc(device,
265 device->heaps.pds_heap,
266 PVR_DW_TO_BYTES(kick_usc_pds_prog.data_size +
267 kick_usc_pds_prog.code_size),
268 16,
269 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
270 &entry_data->kick_usc_pds_upload);
271 if (result != VK_SUCCESS)
272 goto err_free_usc_upload;
273
274 pvr_pds_kick_usc(&kick_usc_pds_prog,
275 entry_data->kick_usc_pds_upload->bo->map,
276 0U,
277 false,
278 PDS_GENERATE_CODEDATA_SEGMENTS);
279
280 dev_addr = entry_data->kick_usc_pds_upload->vma->dev_addr;
281 dev_addr.addr -= device->heaps.pds_heap->base_addr.addr;
282 entry_data->kick_usc_pds_offset = dev_addr;
283
284 *entry_data_out = entry_data;
285
286 return VK_SUCCESS;
287
288 err_free_usc_upload:
289 pvr_bo_suballoc_free(entry_data->usc_upload);
290
291 err_free_entry:
292 ralloc_free(entry_data);
293
294 return result;
295 }
296
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(struct pvr_device * device,const struct pvr_transfer_frag_store_entry_data * entry_data)297 static void inline pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
298 struct pvr_device *device,
299 const struct pvr_transfer_frag_store_entry_data *entry_data)
300 {
301 pvr_bo_free(device, entry_data->kick_usc_pds_upload);
302 pvr_bo_suballoc_free(entry_data->usc_upload);
303 }
304
pvr_transfer_frag_store_entry_data_destroy(struct pvr_device * device,const struct pvr_transfer_frag_store_entry_data * entry_data)305 static void inline pvr_transfer_frag_store_entry_data_destroy(
306 struct pvr_device *device,
307 const struct pvr_transfer_frag_store_entry_data *entry_data)
308 {
309 pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(device,
310 entry_data);
311 /* Casting away the const :( */
312 ralloc_free((void *)entry_data);
313 }
314
pvr_transfer_frag_store_get_entry(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,const struct pvr_transfer_frag_store_entry_data ** const entry_data_out)315 static VkResult pvr_transfer_frag_store_get_entry(
316 struct pvr_device *device,
317 struct pvr_transfer_frag_store *store,
318 const struct pvr_tq_shader_properties *shader_props,
319 const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
320 {
321 const uint32_t key =
322 pvr_transfer_frag_shader_key(store->max_multisample, shader_props);
323 const struct hash_entry *entry;
324 VkResult result;
325
326 entry = _mesa_hash_table_search(store->hash_table, to_hash_table_key(key));
327 if (!entry) {
328 /* Init so that gcc stops complaining. */
329 const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
330
331 result = pvr_transfer_frag_store_entry_data_create(device,
332 store,
333 shader_props,
334 &entry_data);
335 if (result != VK_SUCCESS)
336 return result;
337
338 assert(entry_data);
339
340 entry = _mesa_hash_table_insert(store->hash_table,
341 to_hash_table_key(key),
342 (void *)entry_data);
343 if (!entry) {
344 pvr_transfer_frag_store_entry_data_destroy(device, entry_data);
345 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
346 }
347 }
348
349 *entry_data_out = to_pvr_entry_data(entry);
350
351 return VK_SUCCESS;
352 }
353
pvr_transfer_frag_store_get_shader_info(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,pvr_dev_addr_t * const pds_dev_addr_out,const struct pvr_tq_frag_sh_reg_layout ** const reg_layout_out)354 VkResult pvr_transfer_frag_store_get_shader_info(
355 struct pvr_device *device,
356 struct pvr_transfer_frag_store *store,
357 const struct pvr_tq_shader_properties *shader_props,
358 pvr_dev_addr_t *const pds_dev_addr_out,
359 const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out)
360 {
361 /* Init so that gcc stops complaining. */
362 const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
363 VkResult result;
364
365 result = pvr_transfer_frag_store_get_entry(device,
366 store,
367 shader_props,
368 &entry_data);
369 if (result != VK_SUCCESS)
370 return result;
371
372 *pds_dev_addr_out = entry_data->kick_usc_pds_offset;
373 *reg_layout_out = &entry_data->sh_reg_layout;
374
375 return VK_SUCCESS;
376 }
377
pvr_transfer_frag_store_fini(struct pvr_device * device,struct pvr_transfer_frag_store * store)378 void pvr_transfer_frag_store_fini(struct pvr_device *device,
379 struct pvr_transfer_frag_store *store)
380 {
381 hash_table_foreach_remove(store->hash_table, entry)
382 {
383 /* ralloc_free() in _mesa_hash_table_destroy() will free each entry's
384 * memory so let's not waste extra time freeing them one by one and
385 * unliking.
386 */
387 pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
388 device,
389 to_pvr_entry_data(entry));
390 }
391
392 _mesa_hash_table_destroy(store->hash_table, NULL);
393 }
394