xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_transfer_frag_store.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <vulkan/vulkan_core.h>
28 
29 #include "hwdef/rogue_hw_utils.h"
30 #include "pvr_bo.h"
31 #include "pvr_common.h"
32 #include "pvr_device_info.h"
33 #include "pvr_job_transfer.h"
34 #include "pvr_pds.h"
35 #include "pvr_private.h"
36 #include "pvr_transfer_frag_store.h"
37 #include "pvr_types.h"
38 #include "pvr_uscgen.h"
39 #include "util/hash_table.h"
40 #include "util/macros.h"
41 #include "util/ralloc.h"
42 #include "util/u_dynarray.h"
43 #include "util/u_math.h"
44 #include "vk_log.h"
45 
46 #define PVR_TRANSFER_BYTE_UNWIND_MAX 16U
47 
48 struct pvr_transfer_frag_store_entry_data {
49    pvr_dev_addr_t kick_usc_pds_offset;
50    struct pvr_bo *kick_usc_pds_upload;
51 
52    struct pvr_suballoc_bo *usc_upload;
53    struct pvr_tq_frag_sh_reg_layout sh_reg_layout;
54 };
55 
56 #define to_pvr_entry_data(_entry) \
57    _Generic((_entry), \
58             struct hash_entry *: (struct pvr_transfer_frag_store_entry_data *)((_entry)->data), \
59             const struct hash_entry *: (const struct pvr_transfer_frag_store_entry_data *)((_entry)->data))
60 
pvr_transfer_frag_store_init(struct pvr_device * device,struct pvr_transfer_frag_store * store)61 VkResult pvr_transfer_frag_store_init(struct pvr_device *device,
62                                       struct pvr_transfer_frag_store *store)
63 {
64    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
65 
66    *store = (struct pvr_transfer_frag_store){
67       .max_multisample = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 1U),
68       .hash_table = _mesa_hash_table_create_u32_keys(NULL),
69    };
70 
71    if (!store->hash_table)
72       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
73 
74    return VK_SUCCESS;
75 }
76 
77 /**
78  * \brief Returns a key based on shader properties.
79  *
80  * Returns a unique key that can be used to uniquely identify a transfer
81  * fragment shader based on the provided shader properties.
82  *
83  * Make sure that the non valid parts of shader_props are memset to 0. Otherwise
84  * these bits might appear in the key as uninitialized data and might not
85  * match a key for the same shader.
86  */
pvr_transfer_frag_shader_key(uint32_t max_multisample,const struct pvr_tq_shader_properties * shader_props)87 static uint32_t pvr_transfer_frag_shader_key(
88    uint32_t max_multisample,
89    const struct pvr_tq_shader_properties *shader_props)
90 {
91    const struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
92    uint32_t resolve_op_num = max_multisample + PVR_RESOLVE_SAMPLE0;
93 
94    uint32_t num_layers_bits = util_logbase2_ceil(PVR_TRANSFER_MAX_LAYERS + 1U);
95    uint32_t layer_float_bits = util_logbase2_ceil(PVR_INT_COORD_SET_FLOATS_NUM);
96    uint32_t pixel_src_bits = util_logbase2_ceil(PVR_TRANSFER_PBE_PIXEL_SRC_NUM);
97    uint32_t byte_unwind_bits = util_logbase2_ceil(PVR_TRANSFER_BYTE_UNWIND_MAX);
98    uint32_t resolve_op_bits = util_logbase2_ceil(resolve_op_num);
99    uint32_t sample_cnt_bits = util_last_bit(util_logbase2(max_multisample));
100    uint32_t hash = 0U;
101 
102 #if MESA_DEBUG
103    uint32_t max_shift = 0U;
104 #   define shift_hash(hash, num)   \
105       do {                         \
106          max_shift += (num);       \
107          assert(max_shift <= 32U); \
108                                    \
109          (hash) <<= (num);         \
110       } while (0U)
111 #else
112 #   define shift_hash(hash, num) hash <<= (num)
113 #endif
114 
115    /* Hash layer info. */
116 
117    shift_hash(hash, layer_float_bits);
118    hash |= (uint32_t)shader_props->layer_props.layer_floats;
119 
120    shift_hash(hash, 1U);
121    hash |= layer->sample;
122 
123    shift_hash(hash, 1U);
124    hash |= (uint32_t) false;
125 
126    shift_hash(hash, 1U);
127    hash |= (uint32_t) false;
128 
129    shift_hash(hash, pixel_src_bits);
130    hash |= (uint32_t)layer->pbe_format;
131 
132    shift_hash(hash, resolve_op_bits);
133    hash |= (uint32_t)layer->resolve_op;
134 
135    assert(util_is_power_of_two_nonzero(layer->sample_count));
136    shift_hash(hash, sample_cnt_bits);
137    hash |= (uint32_t)util_logbase2(layer->sample_count);
138 
139    shift_hash(hash, 1U);
140    hash |= (uint32_t)layer->msaa;
141 
142    shift_hash(hash, byte_unwind_bits);
143    hash |= layer->byte_unwind;
144 
145    shift_hash(hash, 1U);
146    hash |= (uint32_t)layer->linear;
147 
148    /* End layer info. */
149 
150    shift_hash(hash, 1U);
151    hash |= (uint32_t)shader_props->full_rate;
152 
153    shift_hash(hash, 1U);
154    hash |= (uint32_t)shader_props->iterated;
155 
156    shift_hash(hash, 1U);
157    hash |= (uint32_t)shader_props->pick_component;
158 
159    shift_hash(hash, num_layers_bits);
160    /* Just 1 layer. */
161    hash |= 1;
162 
163    shift_hash(hash, 3U);
164    /* alpha type none */
165    hash |= 0;
166 
167 #undef shift_hash
168 
169    return hash;
170 }
171 
172 #define to_hash_table_key(_key) ((void *)(uintptr_t)(_key))
173 
pvr_transfer_frag_store_entry_data_compile(struct pvr_device * device,struct pvr_transfer_frag_store_entry_data * const entry_data,const struct pvr_tq_shader_properties * shader_props,uint32_t * const num_usc_temps_out)174 static VkResult pvr_transfer_frag_store_entry_data_compile(
175    struct pvr_device *device,
176    struct pvr_transfer_frag_store_entry_data *const entry_data,
177    const struct pvr_tq_shader_properties *shader_props,
178    uint32_t *const num_usc_temps_out)
179 {
180    const uint32_t image_desc_offset =
181       offsetof(struct pvr_combined_image_sampler_descriptor, image) / 4;
182    const uint32_t sampler_desc_offset =
183       offsetof(struct pvr_combined_image_sampler_descriptor, sampler) / 4;
184 
185    const uint32_t cache_line_size =
186       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
187 
188    struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
189    uint32_t next_free_sh_reg = 0;
190    struct util_dynarray shader;
191    VkResult result;
192 
193    /* TODO: Allocate all combined image samplers if needed? Otherwise change the
194     * array to a single descriptor.
195     */
196    sh_reg_layout->combined_image_samplers.offsets[0].image =
197       next_free_sh_reg + image_desc_offset;
198    sh_reg_layout->combined_image_samplers.offsets[0].sampler =
199       next_free_sh_reg + sampler_desc_offset;
200    sh_reg_layout->combined_image_samplers.count = 1;
201    next_free_sh_reg += sizeof(struct pvr_combined_image_sampler_descriptor) / 4;
202 
203    /* TODO: Handle dynamic_const_regs used for PVR_INT_COORD_SET_FLOATS_{4,6}, Z
204     * position, texel unwind, etc. when compiler adds support for them.
205     */
206    sh_reg_layout->dynamic_consts.offset = next_free_sh_reg;
207    sh_reg_layout->dynamic_consts.count = 0;
208 
209    sh_reg_layout->driver_total = next_free_sh_reg;
210 
211    pvr_uscgen_tq_frag(shader_props,
212                       &entry_data->sh_reg_layout,
213                       num_usc_temps_out,
214                       &shader);
215 
216    result = pvr_gpu_upload_usc(device,
217                                util_dynarray_begin(&shader),
218                                util_dynarray_num_elements(&shader, uint8_t),
219                                cache_line_size,
220                                &entry_data->usc_upload);
221    util_dynarray_fini(&shader);
222    if (result != VK_SUCCESS)
223       return result;
224 
225    return VK_SUCCESS;
226 }
227 
pvr_transfer_frag_store_entry_data_create(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,const struct pvr_transfer_frag_store_entry_data ** const entry_data_out)228 static VkResult pvr_transfer_frag_store_entry_data_create(
229    struct pvr_device *device,
230    struct pvr_transfer_frag_store *store,
231    const struct pvr_tq_shader_properties *shader_props,
232    const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
233 {
234    struct pvr_pds_kickusc_program kick_usc_pds_prog = { 0 };
235    struct pvr_transfer_frag_store_entry_data *entry_data;
236    pvr_dev_addr_t dev_addr;
237    uint32_t num_usc_temps;
238    VkResult result;
239 
240    entry_data = ralloc(store->hash_table, __typeof__(*entry_data));
241    if (!entry_data)
242       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
243 
244    result = pvr_transfer_frag_store_entry_data_compile(device,
245                                                        entry_data,
246                                                        shader_props,
247                                                        &num_usc_temps);
248    if (result != VK_SUCCESS)
249       goto err_free_entry;
250 
251    dev_addr = entry_data->usc_upload->dev_addr;
252    dev_addr.addr -= device->heaps.usc_heap->base_addr.addr;
253 
254    pvr_pds_setup_doutu(&kick_usc_pds_prog.usc_task_control,
255                        dev_addr.addr,
256                        num_usc_temps,
257                        shader_props->full_rate
258                           ? PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL)
259                           : PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
260                        false);
261 
262    pvr_pds_kick_usc(&kick_usc_pds_prog, NULL, 0U, false, PDS_GENERATE_SIZES);
263 
264    result = pvr_bo_alloc(device,
265                          device->heaps.pds_heap,
266                          PVR_DW_TO_BYTES(kick_usc_pds_prog.data_size +
267                                          kick_usc_pds_prog.code_size),
268                          16,
269                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
270                          &entry_data->kick_usc_pds_upload);
271    if (result != VK_SUCCESS)
272       goto err_free_usc_upload;
273 
274    pvr_pds_kick_usc(&kick_usc_pds_prog,
275                     entry_data->kick_usc_pds_upload->bo->map,
276                     0U,
277                     false,
278                     PDS_GENERATE_CODEDATA_SEGMENTS);
279 
280    dev_addr = entry_data->kick_usc_pds_upload->vma->dev_addr;
281    dev_addr.addr -= device->heaps.pds_heap->base_addr.addr;
282    entry_data->kick_usc_pds_offset = dev_addr;
283 
284    *entry_data_out = entry_data;
285 
286    return VK_SUCCESS;
287 
288 err_free_usc_upload:
289    pvr_bo_suballoc_free(entry_data->usc_upload);
290 
291 err_free_entry:
292    ralloc_free(entry_data);
293 
294    return result;
295 }
296 
pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(struct pvr_device * device,const struct pvr_transfer_frag_store_entry_data * entry_data)297 static void inline pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
298    struct pvr_device *device,
299    const struct pvr_transfer_frag_store_entry_data *entry_data)
300 {
301    pvr_bo_free(device, entry_data->kick_usc_pds_upload);
302    pvr_bo_suballoc_free(entry_data->usc_upload);
303 }
304 
pvr_transfer_frag_store_entry_data_destroy(struct pvr_device * device,const struct pvr_transfer_frag_store_entry_data * entry_data)305 static void inline pvr_transfer_frag_store_entry_data_destroy(
306    struct pvr_device *device,
307    const struct pvr_transfer_frag_store_entry_data *entry_data)
308 {
309    pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(device,
310                                                              entry_data);
311    /* Casting away the const :( */
312    ralloc_free((void *)entry_data);
313 }
314 
pvr_transfer_frag_store_get_entry(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,const struct pvr_transfer_frag_store_entry_data ** const entry_data_out)315 static VkResult pvr_transfer_frag_store_get_entry(
316    struct pvr_device *device,
317    struct pvr_transfer_frag_store *store,
318    const struct pvr_tq_shader_properties *shader_props,
319    const struct pvr_transfer_frag_store_entry_data **const entry_data_out)
320 {
321    const uint32_t key =
322       pvr_transfer_frag_shader_key(store->max_multisample, shader_props);
323    const struct hash_entry *entry;
324    VkResult result;
325 
326    entry = _mesa_hash_table_search(store->hash_table, to_hash_table_key(key));
327    if (!entry) {
328       /* Init so that gcc stops complaining. */
329       const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
330 
331       result = pvr_transfer_frag_store_entry_data_create(device,
332                                                          store,
333                                                          shader_props,
334                                                          &entry_data);
335       if (result != VK_SUCCESS)
336          return result;
337 
338       assert(entry_data);
339 
340       entry = _mesa_hash_table_insert(store->hash_table,
341                                       to_hash_table_key(key),
342                                       (void *)entry_data);
343       if (!entry) {
344          pvr_transfer_frag_store_entry_data_destroy(device, entry_data);
345          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
346       }
347    }
348 
349    *entry_data_out = to_pvr_entry_data(entry);
350 
351    return VK_SUCCESS;
352 }
353 
pvr_transfer_frag_store_get_shader_info(struct pvr_device * device,struct pvr_transfer_frag_store * store,const struct pvr_tq_shader_properties * shader_props,pvr_dev_addr_t * const pds_dev_addr_out,const struct pvr_tq_frag_sh_reg_layout ** const reg_layout_out)354 VkResult pvr_transfer_frag_store_get_shader_info(
355    struct pvr_device *device,
356    struct pvr_transfer_frag_store *store,
357    const struct pvr_tq_shader_properties *shader_props,
358    pvr_dev_addr_t *const pds_dev_addr_out,
359    const struct pvr_tq_frag_sh_reg_layout **const reg_layout_out)
360 {
361    /* Init so that gcc stops complaining. */
362    const struct pvr_transfer_frag_store_entry_data *entry_data = NULL;
363    VkResult result;
364 
365    result = pvr_transfer_frag_store_get_entry(device,
366                                               store,
367                                               shader_props,
368                                               &entry_data);
369    if (result != VK_SUCCESS)
370       return result;
371 
372    *pds_dev_addr_out = entry_data->kick_usc_pds_offset;
373    *reg_layout_out = &entry_data->sh_reg_layout;
374 
375    return VK_SUCCESS;
376 }
377 
pvr_transfer_frag_store_fini(struct pvr_device * device,struct pvr_transfer_frag_store * store)378 void pvr_transfer_frag_store_fini(struct pvr_device *device,
379                                   struct pvr_transfer_frag_store *store)
380 {
381    hash_table_foreach_remove(store->hash_table, entry)
382    {
383       /* ralloc_free() in _mesa_hash_table_destroy() will free each entry's
384        * memory so let's not waste extra time freeing them one by one and
385        * unliking.
386        */
387       pvr_transfer_frag_store_entry_data_destroy_no_ralloc_free(
388          device,
389          to_pvr_entry_data(entry));
390    }
391 
392    _mesa_hash_table_destroy(store->hash_table, NULL);
393 }
394