xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_spm.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdint.h>
25 #include <stddef.h>
26 #include <string.h>
27 #include <vulkan/vulkan_core.h>
28 
29 #include "c11/threads.h"
30 #include "hwdef/rogue_hw_utils.h"
31 #include "pvr_bo.h"
32 #include "pvr_csb.h"
33 #include "pvr_csb_enum_helpers.h"
34 #include "pvr_device_info.h"
35 #include "pvr_formats.h"
36 #include "pvr_hw_pass.h"
37 #include "pvr_job_common.h"
38 #include "pvr_pds.h"
39 #include "pvr_private.h"
40 #include "pvr_shader_factory.h"
41 #include "pvr_spm.h"
42 #include "pvr_static_shaders.h"
43 #include "pvr_tex_state.h"
44 #include "pvr_types.h"
45 #include "pvr_uscgen.h"
46 #include "util/bitscan.h"
47 #include "util/macros.h"
48 #include "util/simple_mtx.h"
49 #include "util/u_atomic.h"
50 #include "vk_alloc.h"
51 #include "vk_log.h"
52 
53 struct pvr_spm_scratch_buffer {
54    uint32_t ref_count;
55    struct pvr_bo *bo;
56    uint64_t size;
57 };
58 
pvr_spm_init_scratch_buffer_store(struct pvr_device * device)59 void pvr_spm_init_scratch_buffer_store(struct pvr_device *device)
60 {
61    struct pvr_spm_scratch_buffer_store *store =
62       &device->spm_scratch_buffer_store;
63 
64    simple_mtx_init(&store->mtx, mtx_plain);
65    store->head_ref = NULL;
66 }
67 
pvr_spm_finish_scratch_buffer_store(struct pvr_device * device)68 void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device)
69 {
70    struct pvr_spm_scratch_buffer_store *store =
71       &device->spm_scratch_buffer_store;
72 
73    /* Either a framebuffer was never created so no scratch buffer was ever
74     * created or all framebuffers have been freed so only the store's reference
75     * remains.
76     */
77    assert(!store->head_ref || p_atomic_read(&store->head_ref->ref_count) == 1);
78 
79    simple_mtx_destroy(&store->mtx);
80 
81    if (store->head_ref) {
82       pvr_bo_free(device, store->head_ref->bo);
83       vk_free(&device->vk.alloc, store->head_ref);
84    }
85 }
86 
87 uint64_t
pvr_spm_scratch_buffer_calc_required_size(const struct pvr_render_pass * pass,uint32_t framebuffer_width,uint32_t framebuffer_height)88 pvr_spm_scratch_buffer_calc_required_size(const struct pvr_render_pass *pass,
89                                           uint32_t framebuffer_width,
90                                           uint32_t framebuffer_height)
91 {
92    uint64_t dwords_per_pixel;
93    uint64_t buffer_size;
94 
95    /* If we're allocating an SPM scratch buffer we'll have a minimum of 1 output
96     * reg and/or tile_buffer.
97     */
98    uint32_t nr_tile_buffers = 1;
99    uint32_t nr_output_regs = 1;
100 
101    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
102       const struct pvr_renderpass_hwsetup_render *hw_render =
103          &pass->hw_setup->renders[i];
104 
105       nr_tile_buffers = MAX2(nr_tile_buffers, hw_render->tile_buffers_count);
106       nr_output_regs = MAX2(nr_output_regs, hw_render->output_regs_count);
107    }
108 
109    dwords_per_pixel =
110       (uint64_t)pass->max_sample_count * nr_output_regs * nr_tile_buffers;
111 
112    buffer_size = ALIGN_POT((uint64_t)framebuffer_width,
113                            PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
114    buffer_size *=
115       (uint64_t)framebuffer_height * PVR_DW_TO_BYTES(dwords_per_pixel);
116 
117    return buffer_size;
118 }
119 
120 static VkResult
pvr_spm_scratch_buffer_alloc(struct pvr_device * device,uint64_t size,struct pvr_spm_scratch_buffer ** const buffer_out)121 pvr_spm_scratch_buffer_alloc(struct pvr_device *device,
122                              uint64_t size,
123                              struct pvr_spm_scratch_buffer **const buffer_out)
124 {
125    const uint32_t cache_line_size =
126       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
127    struct pvr_spm_scratch_buffer *scratch_buffer;
128    struct pvr_bo *bo;
129    VkResult result;
130 
131    result = pvr_bo_alloc(device,
132                          device->heaps.general_heap,
133                          size,
134                          cache_line_size,
135                          0,
136                          &bo);
137    if (result != VK_SUCCESS) {
138       *buffer_out = NULL;
139       return result;
140    }
141 
142    scratch_buffer = vk_alloc(&device->vk.alloc,
143                              sizeof(*scratch_buffer),
144                              4,
145                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
146    if (!scratch_buffer) {
147       pvr_bo_free(device, bo);
148       *buffer_out = NULL;
149       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
150    }
151 
152    *scratch_buffer = (struct pvr_spm_scratch_buffer){
153       .bo = bo,
154       .size = size,
155    };
156 
157    *buffer_out = scratch_buffer;
158 
159    return VK_SUCCESS;
160 }
161 
162 static void
pvr_spm_scratch_buffer_release_locked(struct pvr_device * device,struct pvr_spm_scratch_buffer * buffer)163 pvr_spm_scratch_buffer_release_locked(struct pvr_device *device,
164                                       struct pvr_spm_scratch_buffer *buffer)
165 {
166    struct pvr_spm_scratch_buffer_store *store =
167       &device->spm_scratch_buffer_store;
168 
169    simple_mtx_assert_locked(&store->mtx);
170 
171    if (p_atomic_dec_zero(&buffer->ref_count)) {
172       pvr_bo_free(device, buffer->bo);
173       vk_free(&device->vk.alloc, buffer);
174    }
175 }
176 
pvr_spm_scratch_buffer_release(struct pvr_device * device,struct pvr_spm_scratch_buffer * buffer)177 void pvr_spm_scratch_buffer_release(struct pvr_device *device,
178                                     struct pvr_spm_scratch_buffer *buffer)
179 {
180    struct pvr_spm_scratch_buffer_store *store =
181       &device->spm_scratch_buffer_store;
182 
183    simple_mtx_lock(&store->mtx);
184 
185    pvr_spm_scratch_buffer_release_locked(device, buffer);
186 
187    simple_mtx_unlock(&store->mtx);
188 }
189 
pvr_spm_scratch_buffer_store_set_head_ref_locked(struct pvr_spm_scratch_buffer_store * store,struct pvr_spm_scratch_buffer * buffer)190 static void pvr_spm_scratch_buffer_store_set_head_ref_locked(
191    struct pvr_spm_scratch_buffer_store *store,
192    struct pvr_spm_scratch_buffer *buffer)
193 {
194    simple_mtx_assert_locked(&store->mtx);
195    assert(!store->head_ref);
196 
197    p_atomic_inc(&buffer->ref_count);
198    store->head_ref = buffer;
199 }
200 
pvr_spm_scratch_buffer_store_release_head_ref_locked(struct pvr_device * device,struct pvr_spm_scratch_buffer_store * store)201 static void pvr_spm_scratch_buffer_store_release_head_ref_locked(
202    struct pvr_device *device,
203    struct pvr_spm_scratch_buffer_store *store)
204 {
205    simple_mtx_assert_locked(&store->mtx);
206 
207    pvr_spm_scratch_buffer_release_locked(device, store->head_ref);
208 
209    store->head_ref = NULL;
210 }
211 
pvr_spm_scratch_buffer_get_buffer(struct pvr_device * device,uint64_t size,struct pvr_spm_scratch_buffer ** const buffer_out)212 VkResult pvr_spm_scratch_buffer_get_buffer(
213    struct pvr_device *device,
214    uint64_t size,
215    struct pvr_spm_scratch_buffer **const buffer_out)
216 {
217    struct pvr_spm_scratch_buffer_store *store =
218       &device->spm_scratch_buffer_store;
219    struct pvr_spm_scratch_buffer *buffer;
220 
221    simple_mtx_lock(&store->mtx);
222 
223    /* When a render requires a PR the fw will wait for other renders to end,
224     * free the PB space, unschedule any other vert/frag jobs and solely run the
225     * PR on the whole device until completion.
226     * Thus we can safely use the same scratch buffer across multiple
227     * framebuffers as the scratch buffer is only used during PRs and only one PR
228     * can ever be executed at any one time.
229     */
230    if (store->head_ref && store->head_ref->size <= size) {
231       buffer = store->head_ref;
232    } else {
233       VkResult result;
234 
235       if (store->head_ref)
236          pvr_spm_scratch_buffer_store_release_head_ref_locked(device, store);
237 
238       result = pvr_spm_scratch_buffer_alloc(device, size, &buffer);
239       if (result != VK_SUCCESS) {
240          simple_mtx_unlock(&store->mtx);
241          *buffer_out = NULL;
242 
243          return result;
244       }
245 
246       pvr_spm_scratch_buffer_store_set_head_ref_locked(store, buffer);
247    }
248 
249    p_atomic_inc(&buffer->ref_count);
250    simple_mtx_unlock(&store->mtx);
251    *buffer_out = buffer;
252 
253    return VK_SUCCESS;
254 }
255 
pvr_device_init_spm_load_state(struct pvr_device * device)256 VkResult pvr_device_init_spm_load_state(struct pvr_device *device)
257 {
258    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
259    uint32_t pds_texture_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
260    uint32_t pds_kick_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
261    uint32_t usc_aligned_offsets[PVR_SPM_LOAD_PROGRAM_COUNT];
262    uint32_t pds_allocation_size = 0;
263    uint32_t usc_allocation_size = 0;
264    struct pvr_suballoc_bo *pds_bo;
265    struct pvr_suballoc_bo *usc_bo;
266    uint8_t *mem_ptr;
267    VkResult result;
268 
269    static_assert(PVR_SPM_LOAD_PROGRAM_COUNT == ARRAY_SIZE(spm_load_collection),
270                  "Size mismatch");
271 
272    /* TODO: We don't need to upload all the programs since the set contains
273     * programs for devices with 8 output regs as well. We can save some memory
274     * by not uploading them on devices without the feature.
275     * It's likely that once the compiler is hooked up we'll be using the shader
276     * cache and generate the shaders as needed so this todo will be unnecessary.
277     */
278 
279    /* Upload USC shaders. */
280 
281    for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
282       usc_aligned_offsets[i] = usc_allocation_size;
283       usc_allocation_size += ALIGN_POT(spm_load_collection[i].size, 4);
284    }
285 
286    result = pvr_bo_suballoc(&device->suballoc_usc,
287                             usc_allocation_size,
288                             4,
289                             false,
290                             &usc_bo);
291    if (result != VK_SUCCESS)
292       return result;
293 
294    mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(usc_bo);
295 
296    for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
297       memcpy(mem_ptr + usc_aligned_offsets[i],
298              spm_load_collection[i].code,
299              spm_load_collection[i].size);
300    }
301 
302    /* Upload PDS programs. */
303 
304    for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
305       struct pvr_pds_pixel_shader_sa_program pds_texture_program = {
306          /* DMA for clear colors and tile buffer address parts. */
307          .num_texture_dma_kicks = 1,
308       };
309       struct pvr_pds_kickusc_program pds_kick_program = { 0 };
310 
311       /* TODO: This looks a bit odd and isn't consistent with other code where
312        * we're getting the size of the PDS program. Can we improve this?
313        */
314       pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&pds_texture_program);
315       pvr_pds_set_sizes_pixel_shader_sa_texture_data(&pds_texture_program,
316                                                      dev_info);
317 
318       /* TODO: Looking at the pvr_pds_generate_...() functions and the run-time
319        * behavior the data size is always the same here. Should we try saving
320        * some memory by adjusting things based on that?
321        */
322       device->spm_load_state.load_program[i].pds_texture_program_data_size =
323          pds_texture_program.data_size;
324 
325       pds_texture_aligned_offsets[i] = pds_allocation_size;
326       /* FIXME: Figure out the define for alignment of 16. */
327       pds_allocation_size +=
328          ALIGN_POT(PVR_DW_TO_BYTES(pds_texture_program.code_size), 16);
329 
330       pvr_pds_set_sizes_pixel_shader(&pds_kick_program);
331 
332       pds_kick_aligned_offsets[i] = pds_allocation_size;
333       /* FIXME: Figure out the define for alignment of 16. */
334       pds_allocation_size +=
335          ALIGN_POT(PVR_DW_TO_BYTES(pds_kick_program.code_size +
336                                    pds_kick_program.data_size),
337                    16);
338    }
339 
340    /* FIXME: Figure out the define for alignment of 16. */
341    result = pvr_bo_suballoc(&device->suballoc_pds,
342                             pds_allocation_size,
343                             16,
344                             false,
345                             &pds_bo);
346    if (result != VK_SUCCESS) {
347       pvr_bo_suballoc_free(usc_bo);
348       return result;
349    }
350 
351    mem_ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(pds_bo);
352 
353    for (uint32_t i = 0; i < ARRAY_SIZE(spm_load_collection); i++) {
354       struct pvr_pds_pixel_shader_sa_program pds_texture_program = {
355          /* DMA for clear colors and tile buffer address parts. */
356          .num_texture_dma_kicks = 1,
357       };
358       const pvr_dev_addr_t usc_program_dev_addr =
359          PVR_DEV_ADDR_OFFSET(usc_bo->dev_addr, usc_aligned_offsets[i]);
360       struct pvr_pds_kickusc_program pds_kick_program = { 0 };
361       enum PVRX(PDSINST_DOUTU_SAMPLE_RATE) sample_rate;
362 
363       pvr_pds_generate_pixel_shader_sa_code_segment(
364          &pds_texture_program,
365          (uint32_t *)(mem_ptr + pds_texture_aligned_offsets[i]));
366 
367       if (spm_load_collection[i].info->msaa_sample_count > 1)
368          sample_rate = PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL);
369       else
370          sample_rate = PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE);
371 
372       pvr_pds_setup_doutu(&pds_kick_program.usc_task_control,
373                           usc_program_dev_addr.addr,
374                           spm_load_collection[i].info->temps_required,
375                           sample_rate,
376                           false);
377 
378       /* Generated both code and data. */
379       pvr_pds_generate_pixel_shader_program(
380          &pds_kick_program,
381          (uint32_t *)(mem_ptr + pds_kick_aligned_offsets[i]));
382 
383       device->spm_load_state.load_program[i].pds_pixel_program_offset =
384          PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_kick_aligned_offsets[i]);
385       device->spm_load_state.load_program[i].pds_uniform_program_offset =
386          PVR_DEV_ADDR_OFFSET(pds_bo->dev_addr, pds_texture_aligned_offsets[i]);
387 
388       /* TODO: From looking at the pvr_pds_generate_...() functions, it seems
389        * like temps_used is always 1. Should we remove this and hard code it
390        * with a define in the PDS code?
391        */
392       device->spm_load_state.load_program[i].pds_texture_program_temps_count =
393          pds_texture_program.temps_used;
394    }
395 
396    device->spm_load_state.usc_programs = usc_bo;
397    device->spm_load_state.pds_programs = pds_bo;
398 
399    return VK_SUCCESS;
400 }
401 
pvr_device_finish_spm_load_state(struct pvr_device * device)402 void pvr_device_finish_spm_load_state(struct pvr_device *device)
403 {
404    pvr_bo_suballoc_free(device->spm_load_state.pds_programs);
405    pvr_bo_suballoc_free(device->spm_load_state.usc_programs);
406 }
407 
PVRX(PBESTATE_PACKMODE)408 static inline enum PVRX(PBESTATE_PACKMODE)
409    pvr_spm_get_pbe_packmode(uint32_t dword_count)
410 {
411    switch (dword_count) {
412    case 1:
413       return PVRX(PBESTATE_PACKMODE_U32);
414    case 2:
415       return PVRX(PBESTATE_PACKMODE_U32U32);
416    case 3:
417       return PVRX(PBESTATE_PACKMODE_U32U32U32);
418    case 4:
419       return PVRX(PBESTATE_PACKMODE_U32U32U32U32);
420    default:
421       unreachable("Unsupported dword_count");
422    }
423 }
424 
425 /**
426  * \brief Sets up PBE registers and state values per a single render output.
427  *
428  * On a PR we want to store tile data to the scratch buffer so we need to
429  * setup the Pixel Back End (PBE) to write the data to the scratch buffer. This
430  * function sets up the PBE state and register values required to do so, for a
431  * single resource whether it be a tile buffer or the output register set.
432  *
433  * \return Size of the data saved into the scratch buffer in bytes.
434  */
pvr_spm_setup_pbe_state(const struct pvr_device_info * dev_info,const VkExtent2D * framebuffer_size,uint32_t dword_count,enum pvr_pbe_source_start_pos source_start,uint32_t sample_count,pvr_dev_addr_t scratch_buffer_addr,uint32_t pbe_state_words_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint64_t pbe_reg_words_out[static const ROGUE_NUM_PBESTATE_REG_WORDS])435 static uint64_t pvr_spm_setup_pbe_state(
436    const struct pvr_device_info *dev_info,
437    const VkExtent2D *framebuffer_size,
438    uint32_t dword_count,
439    enum pvr_pbe_source_start_pos source_start,
440    uint32_t sample_count,
441    pvr_dev_addr_t scratch_buffer_addr,
442    uint32_t pbe_state_words_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
443    uint64_t pbe_reg_words_out[static const ROGUE_NUM_PBESTATE_REG_WORDS])
444 {
445    const uint32_t stride =
446       ALIGN_POT(framebuffer_size->width,
447                 PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE));
448 
449    const struct pvr_pbe_surf_params surface_params = {
450       .swizzle = {
451          [0] = PIPE_SWIZZLE_X,
452          [1] = PIPE_SWIZZLE_Y,
453          [2] = PIPE_SWIZZLE_Z,
454          [3] = PIPE_SWIZZLE_W,
455       },
456       .pbe_packmode = pvr_spm_get_pbe_packmode(dword_count),
457       .source_format = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL),
458       .addr = scratch_buffer_addr,
459       .mem_layout = PVR_MEMLAYOUT_LINEAR,
460       .stride = stride,
461    };
462    const struct pvr_pbe_render_params render_params = {
463       .max_x_clip = framebuffer_size->width - 1,
464       .max_y_clip = framebuffer_size->height - 1,
465       .source_start = source_start,
466    };
467 
468    pvr_pbe_pack_state(dev_info,
469                       &surface_params,
470                       &render_params,
471                       pbe_state_words_out,
472                       pbe_reg_words_out);
473 
474    return (uint64_t)stride * framebuffer_size->height * sample_count *
475           PVR_DW_TO_BYTES(dword_count);
476 }
477 
pvr_set_pbe_all_valid_mask(struct usc_mrt_desc * desc)478 static inline void pvr_set_pbe_all_valid_mask(struct usc_mrt_desc *desc)
479 {
480    for (uint32_t i = 0; i < ARRAY_SIZE(desc->valid_mask); i++)
481       desc->valid_mask[i] = ~0;
482 }
483 
484 #define PVR_DEV_ADDR_ADVANCE(_addr, _offset) \
485    _addr = PVR_DEV_ADDR_OFFSET(_addr, _offset)
486 
487 /**
488  * \brief Sets up PBE registers, PBE state values and MRT data per a single
489  * render output requiring 8 dwords to be written.
490  *
491  * On a PR we want to store tile data to the scratch buffer so we need to
492  * setup the Pixel Back End (PBE) to write the data to the scratch buffer, as
493  * well as setup the Multiple Render Target (MRT) info so the compiler knows
494  * what data needs to be stored (output regs or tile buffers) and generate the
495  * appropriate EOT shader.
496  *
497  * This function is only available for devices with the eight_output_registers
498  * feature thus requiring 8 dwords to be stored.
499  *
500  * \return Size of the data saved into the scratch buffer in bytes.
501  */
pvr_spm_setup_pbe_eight_dword_write(const struct pvr_device_info * dev_info,const VkExtent2D * framebuffer_size,uint32_t sample_count,enum usc_mrt_resource_type source_type,uint32_t tile_buffer_idx,pvr_dev_addr_t scratch_buffer_addr,uint32_t pbe_state_word_0_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint32_t pbe_state_word_1_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint64_t pbe_reg_word_0_out[static const ROGUE_NUM_PBESTATE_REG_WORDS],uint64_t pbe_reg_word_1_out[static const ROGUE_NUM_PBESTATE_REG_WORDS],uint32_t * render_target_used_out)502 static uint64_t pvr_spm_setup_pbe_eight_dword_write(
503    const struct pvr_device_info *dev_info,
504    const VkExtent2D *framebuffer_size,
505    uint32_t sample_count,
506    enum usc_mrt_resource_type source_type,
507    uint32_t tile_buffer_idx,
508    pvr_dev_addr_t scratch_buffer_addr,
509    uint32_t pbe_state_word_0_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
510    uint32_t pbe_state_word_1_out[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
511    uint64_t pbe_reg_word_0_out[static const ROGUE_NUM_PBESTATE_REG_WORDS],
512    uint64_t pbe_reg_word_1_out[static const ROGUE_NUM_PBESTATE_REG_WORDS],
513    uint32_t *render_target_used_out)
514 {
515    const uint32_t max_pbe_write_size_dw = 4;
516    uint32_t render_target_used = 0;
517    uint64_t mem_stored;
518 
519    assert(PVR_HAS_FEATURE(dev_info, eight_output_registers));
520    assert(source_type != USC_MRT_RESOURCE_TYPE_INVALID);
521 
522    /* To store 8 dwords we need to split this into two
523     * ROGUE_PBESTATE_PACKMODE_U32U32U32U32 stores with the second one using
524     * PVR_PBE_STARTPOS_BIT128 as the source offset to store the last 4 dwords.
525     */
526 
527    mem_stored = pvr_spm_setup_pbe_state(dev_info,
528                                         framebuffer_size,
529                                         max_pbe_write_size_dw,
530                                         PVR_PBE_STARTPOS_BIT0,
531                                         sample_count,
532                                         scratch_buffer_addr,
533                                         pbe_state_word_0_out,
534                                         pbe_reg_word_0_out);
535 
536    PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored);
537 
538    render_target_used++;
539 
540    mem_stored += pvr_spm_setup_pbe_state(dev_info,
541                                          framebuffer_size,
542                                          max_pbe_write_size_dw,
543                                          PVR_PBE_STARTPOS_BIT128,
544                                          sample_count,
545                                          scratch_buffer_addr,
546                                          pbe_state_word_1_out,
547                                          pbe_reg_word_1_out);
548 
549    PVR_DEV_ADDR_ADVANCE(scratch_buffer_addr, mem_stored);
550 
551    render_target_used++;
552    *render_target_used_out = render_target_used;
553 
554    return mem_stored;
555 }
556 
557 /**
558  * \brief Create and upload the EOT PDS program.
559  *
560  * Essentially DOUTU the USC EOT shader.
561  */
562 /* TODO: See if we can dedup this with
563  * pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload().
564  */
pvr_pds_pixel_event_program_create_and_upload(struct pvr_device * device,const struct pvr_suballoc_bo * usc_eot_program,uint32_t usc_temp_count,struct pvr_pds_upload * const pds_upload_out)565 static VkResult pvr_pds_pixel_event_program_create_and_upload(
566    struct pvr_device *device,
567    const struct pvr_suballoc_bo *usc_eot_program,
568    uint32_t usc_temp_count,
569    struct pvr_pds_upload *const pds_upload_out)
570 {
571    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
572    struct pvr_pds_event_program program = { 0 };
573    uint32_t *staging_buffer;
574    VkResult result;
575 
576    pvr_pds_setup_doutu(&program.task_control,
577                        usc_eot_program->dev_addr.addr,
578                        usc_temp_count,
579                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
580                        false);
581 
582    staging_buffer =
583       vk_alloc(&device->vk.alloc,
584                PVR_DW_TO_BYTES(device->pixel_event_data_size_in_dwords),
585                8,
586                VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
587    if (!staging_buffer)
588       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
589 
590    pvr_pds_generate_pixel_event_data_segment(&program,
591                                              staging_buffer,
592                                              dev_info);
593 
594    result = pvr_gpu_upload_pds(device,
595                                staging_buffer,
596                                device->pixel_event_data_size_in_dwords,
597                                4,
598                                NULL,
599                                0,
600                                0,
601                                4,
602                                pds_upload_out);
603    vk_free(&device->vk.alloc, staging_buffer);
604    return result;
605 }
606 
607 /**
608  * \brief Sets up the End of Tile (EOT) program for SPM.
609  *
610  * This sets up an EOT program to store the render pass'es on-chip and
611  * off-chip tile data to the SPM scratch buffer on the EOT event.
612  */
613 VkResult
pvr_spm_init_eot_state(struct pvr_device * device,struct pvr_spm_eot_state * spm_eot_state,const struct pvr_framebuffer * framebuffer,const struct pvr_renderpass_hwsetup_render * hw_render,uint32_t * emit_count_out)614 pvr_spm_init_eot_state(struct pvr_device *device,
615                        struct pvr_spm_eot_state *spm_eot_state,
616                        const struct pvr_framebuffer *framebuffer,
617                        const struct pvr_renderpass_hwsetup_render *hw_render,
618                        uint32_t *emit_count_out)
619 {
620    const VkExtent2D framebuffer_size = {
621       .width = framebuffer->width,
622       .height = framebuffer->height,
623    };
624    uint32_t pbe_state_words[PVR_MAX_COLOR_ATTACHMENTS]
625                            [ROGUE_NUM_PBESTATE_STATE_WORDS];
626    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
627    uint32_t total_render_target_used = 0;
628    struct pvr_pds_upload pds_eot_program;
629    struct util_dynarray usc_shader_binary;
630    uint32_t usc_temp_count;
631    VkResult result;
632 
633    pvr_dev_addr_t next_scratch_buffer_addr =
634       framebuffer->scratch_buffer->bo->vma->dev_addr;
635    uint64_t mem_stored;
636 
637    /* TODO: See if instead of having a separate path for devices with 8 output
638     * regs we can instead do this in a loop and dedup some stuff.
639     */
640    assert(util_is_power_of_two_or_zero(hw_render->output_regs_count) &&
641           hw_render->output_regs_count <= 8);
642    if (hw_render->output_regs_count == 8) {
643       uint32_t render_targets_used;
644 
645       /* Store on-chip tile data (i.e. output regs). */
646 
647       mem_stored = pvr_spm_setup_pbe_eight_dword_write(
648          dev_info,
649          &framebuffer_size,
650          hw_render->sample_count,
651          USC_MRT_RESOURCE_TYPE_OUTPUT_REG,
652          0,
653          next_scratch_buffer_addr,
654          pbe_state_words[total_render_target_used],
655          pbe_state_words[total_render_target_used + 1],
656          spm_eot_state->pbe_reg_words[total_render_target_used],
657          spm_eot_state->pbe_reg_words[total_render_target_used + 1],
658          &render_targets_used);
659 
660       PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
661       total_render_target_used += render_targets_used;
662 
663       /* Store off-chip tile data (i.e. tile buffers). */
664 
665       for (uint32_t i = 0; i < hw_render->tile_buffers_count; i++) {
666          assert(!"Add support for tile buffers in EOT");
667          pvr_finishme("Add support for tile buffers in EOT");
668 
669          /* `+ 1` since we have 2 emits per tile buffer. */
670          assert(total_render_target_used + 1 < PVR_MAX_COLOR_ATTACHMENTS);
671 
672          mem_stored = pvr_spm_setup_pbe_eight_dword_write(
673             dev_info,
674             &framebuffer_size,
675             hw_render->sample_count,
676             USC_MRT_RESOURCE_TYPE_MEMORY,
677             i,
678             next_scratch_buffer_addr,
679             pbe_state_words[total_render_target_used],
680             pbe_state_words[total_render_target_used + 1],
681             spm_eot_state->pbe_reg_words[total_render_target_used],
682             spm_eot_state->pbe_reg_words[total_render_target_used + 1],
683             &render_targets_used);
684 
685          PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
686          total_render_target_used += render_targets_used;
687       }
688    } else {
689       /* Store on-chip tile data (i.e. output regs). */
690 
691       mem_stored = pvr_spm_setup_pbe_state(
692          dev_info,
693          &framebuffer_size,
694          hw_render->output_regs_count,
695          PVR_PBE_STARTPOS_BIT0,
696          hw_render->sample_count,
697          next_scratch_buffer_addr,
698          pbe_state_words[total_render_target_used],
699          spm_eot_state->pbe_reg_words[total_render_target_used]);
700 
701       PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
702 
703       total_render_target_used++;
704 
705       /* Store off-chip tile data (i.e. tile buffers). */
706 
707       for (uint32_t i = 0; i < hw_render->tile_buffers_count; i++) {
708          assert(!"Add support for tile buffers in EOT");
709          pvr_finishme("Add support for tile buffers in EOT");
710 
711          assert(total_render_target_used < PVR_MAX_COLOR_ATTACHMENTS);
712 
713          mem_stored = pvr_spm_setup_pbe_state(
714             dev_info,
715             &framebuffer_size,
716             hw_render->output_regs_count,
717             PVR_PBE_STARTPOS_BIT0,
718             hw_render->sample_count,
719             next_scratch_buffer_addr,
720             pbe_state_words[total_render_target_used],
721             spm_eot_state->pbe_reg_words[total_render_target_used]);
722 
723          PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_stored);
724 
725          total_render_target_used++;
726       }
727    }
728 
729    pvr_uscgen_eot("SPM EOT",
730                   total_render_target_used,
731                   pbe_state_words[0],
732                   &usc_temp_count,
733                   &usc_shader_binary);
734 
735    /* TODO: Create a #define in the compiler code to replace the 16. */
736    result = pvr_gpu_upload_usc(device,
737                                usc_shader_binary.data,
738                                usc_shader_binary.size,
739                                16,
740                                &spm_eot_state->usc_eot_program);
741 
742    util_dynarray_fini(&usc_shader_binary);
743 
744    if (result != VK_SUCCESS)
745       return result;
746 
747    result = pvr_pds_pixel_event_program_create_and_upload(
748       device,
749       spm_eot_state->usc_eot_program,
750       usc_temp_count,
751       &pds_eot_program);
752    if (result != VK_SUCCESS) {
753       pvr_bo_suballoc_free(spm_eot_state->usc_eot_program);
754       return result;
755    }
756 
757    spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
758    spm_eot_state->pixel_event_program_data_offset = pds_eot_program.data_offset;
759 
760    *emit_count_out = total_render_target_used;
761 
762    return VK_SUCCESS;
763 }
764 
pvr_spm_finish_eot_state(struct pvr_device * device,struct pvr_spm_eot_state * spm_eot_state)765 void pvr_spm_finish_eot_state(struct pvr_device *device,
766                               struct pvr_spm_eot_state *spm_eot_state)
767 {
768    pvr_bo_suballoc_free(spm_eot_state->pixel_event_program_data_upload);
769    pvr_bo_suballoc_free(spm_eot_state->usc_eot_program);
770 }
771 
pvr_get_format_from_dword_count(uint32_t dword_count)772 static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count)
773 {
774    switch (dword_count) {
775    case 1:
776       return VK_FORMAT_R32_UINT;
777    case 2:
778       return VK_FORMAT_R32G32_UINT;
779    case 4:
780       return VK_FORMAT_R32G32B32A32_UINT;
781    default:
782       unreachable("Invalid dword_count");
783    }
784 }
785 
pvr_spm_setup_texture_state_words(struct pvr_device * device,uint32_t dword_count,const VkExtent2D framebuffer_size,uint32_t sample_count,pvr_dev_addr_t scratch_buffer_addr,uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],uint64_t * mem_used_out)786 static VkResult pvr_spm_setup_texture_state_words(
787    struct pvr_device *device,
788    uint32_t dword_count,
789    const VkExtent2D framebuffer_size,
790    uint32_t sample_count,
791    pvr_dev_addr_t scratch_buffer_addr,
792    uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],
793    uint64_t *mem_used_out)
794 {
795    /* We can ignore the framebuffer's layer count since we only support
796     * writing to layer 0.
797     */
798    struct pvr_texture_state_info info = {
799       .format = pvr_get_format_from_dword_count(dword_count),
800       .mem_layout = PVR_MEMLAYOUT_LINEAR,
801 
802       .type = VK_IMAGE_VIEW_TYPE_2D,
803       .tex_state_type = PVR_TEXTURE_STATE_STORAGE,
804       .extent = {
805          .width = framebuffer_size.width,
806          .height = framebuffer_size.height,
807       },
808 
809       .mip_levels = 1,
810 
811       .sample_count = sample_count,
812       .stride = framebuffer_size.width,
813 
814       .addr = scratch_buffer_addr,
815    };
816    const uint64_t aligned_fb_width =
817       ALIGN_POT(framebuffer_size.width,
818                 PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
819    const uint64_t fb_area = aligned_fb_width * framebuffer_size.height;
820    const uint8_t *format_swizzle;
821    VkResult result;
822 
823    format_swizzle = pvr_get_format_swizzle(info.format);
824    memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle));
825 
826    result = pvr_pack_tex_state(device, &info, image_descriptor);
827    if (result != VK_SUCCESS)
828       return result;
829 
830    *mem_used_out = fb_area * PVR_DW_TO_BYTES(dword_count) * sample_count;
831 
832    return VK_SUCCESS;
833 }
834 
835 /* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */
pvr_pds_bgnd_program_create_and_upload(struct pvr_device * device,uint32_t texture_program_data_size_in_dwords,const struct pvr_bo * consts_buffer,uint32_t const_shared_regs,struct pvr_pds_upload * pds_upload_out)836 static VkResult pvr_pds_bgnd_program_create_and_upload(
837    struct pvr_device *device,
838    uint32_t texture_program_data_size_in_dwords,
839    const struct pvr_bo *consts_buffer,
840    uint32_t const_shared_regs,
841    struct pvr_pds_upload *pds_upload_out)
842 {
843    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
844    struct pvr_pds_pixel_shader_sa_program texture_program = { 0 };
845    uint32_t staging_buffer_size;
846    uint32_t *staging_buffer;
847    VkResult result;
848 
849    pvr_csb_pack (&texture_program.texture_dma_address[0],
850                  PDSINST_DOUT_FIELDS_DOUTD_SRC0,
851                  doutd_src0) {
852       doutd_src0.sbase = consts_buffer->vma->dev_addr;
853    }
854 
855    pvr_csb_pack (&texture_program.texture_dma_control[0],
856                  PDSINST_DOUT_FIELDS_DOUTD_SRC1,
857                  doutd_src1) {
858       doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
859       doutd_src1.bsize = const_shared_regs;
860    }
861 
862    texture_program.num_texture_dma_kicks += 1;
863 
864 #if MESA_DEBUG
865    pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info);
866    assert(texture_program_data_size_in_dwords == texture_program.data_size);
867 #endif
868 
869    staging_buffer_size = PVR_DW_TO_BYTES(texture_program_data_size_in_dwords);
870 
871    staging_buffer = vk_alloc(&device->vk.alloc,
872                              staging_buffer_size,
873                              8,
874                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
875    if (!staging_buffer)
876       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
877 
878    pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program,
879                                                        staging_buffer,
880                                                        dev_info);
881 
882    /* FIXME: Figure out the define for alignment of 16. */
883    result = pvr_gpu_upload_pds(device,
884                                &staging_buffer[0],
885                                texture_program_data_size_in_dwords,
886                                16,
887                                NULL,
888                                0,
889                                0,
890                                16,
891                                pds_upload_out);
892    if (result != VK_SUCCESS) {
893       vk_free(&device->vk.alloc, staging_buffer);
894       return result;
895    }
896 
897    vk_free(&device->vk.alloc, staging_buffer);
898 
899    return VK_SUCCESS;
900 }
901 
902 VkResult
pvr_spm_init_bgobj_state(struct pvr_device * device,struct pvr_spm_bgobj_state * spm_bgobj_state,const struct pvr_framebuffer * framebuffer,const struct pvr_renderpass_hwsetup_render * hw_render,uint32_t emit_count)903 pvr_spm_init_bgobj_state(struct pvr_device *device,
904                          struct pvr_spm_bgobj_state *spm_bgobj_state,
905                          const struct pvr_framebuffer *framebuffer,
906                          const struct pvr_renderpass_hwsetup_render *hw_render,
907                          uint32_t emit_count)
908 {
909    const uint32_t spm_load_program_idx =
910       pvr_get_spm_load_program_index(hw_render->sample_count,
911                                      hw_render->tile_buffers_count,
912                                      hw_render->output_regs_count);
913    const VkExtent2D framebuffer_size = {
914       .width = framebuffer->width,
915       .height = framebuffer->height,
916    };
917    pvr_dev_addr_t next_scratch_buffer_addr =
918       framebuffer->scratch_buffer->bo->vma->dev_addr;
919    struct pvr_spm_per_load_program_state *load_program_state;
920    struct pvr_pds_upload pds_texture_data_upload;
921    const struct pvr_shader_factory_info *info;
922    union pvr_sampler_descriptor *descriptor;
923    uint64_t consts_buffer_size;
924    uint32_t dword_count;
925    uint32_t *mem_ptr;
926    VkResult result;
927 
928    assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
929    info = spm_load_collection[spm_load_program_idx].info;
930 
931    consts_buffer_size = PVR_DW_TO_BYTES(info->const_shared_regs);
932 
933    /* TODO: Remove this check, along with the pvr_finishme(), once the zeroed
934     * shaders are replaced by the real shaders.
935     */
936    if (!consts_buffer_size)
937       return VK_SUCCESS;
938 
939    pvr_finishme("Remove consts buffer size check");
940 
941    result = pvr_bo_alloc(device,
942                          device->heaps.general_heap,
943                          consts_buffer_size,
944                          sizeof(uint32_t),
945                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
946                          &spm_bgobj_state->consts_buffer);
947    if (result != VK_SUCCESS)
948       return result;
949 
950    mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
951 
952    if (info->driver_const_location_map) {
953       const uint32_t *const const_map = info->driver_const_location_map;
954 
955       for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
956          pvr_dev_addr_t tile_buffer_addr;
957 
958          if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
959 #if MESA_DEBUG
960             for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
961                assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
962 #endif
963             break;
964          }
965 
966          tile_buffer_addr =
967             device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
968 
969          assert(const_map[i] == const_map[i + 1] + 1);
970          mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
971          mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
972       }
973    }
974 
975    /* TODO: The 32 comes from how the shaders are compiled. We should
976     * unhardcode it when this is hooked up to the compiler.
977     */
978    descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32);
979    *descriptor = (union pvr_sampler_descriptor){ 0 };
980 
981    pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) {
982       sampler.non_normalized_coords = true;
983       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
984       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
985       sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
986       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
987       sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
988       sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN);
989       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
990    }
991 
992    /* Even if we might have 8 output regs we can only pack and write 4 dwords
993     * using R32G32B32A32_UINT.
994     */
995    if (hw_render->tile_buffers_count > 0)
996       dword_count = 4;
997    else
998       dword_count = MIN2(hw_render->output_regs_count, 4);
999 
1000    for (uint32_t i = 0; i < emit_count; i++) {
1001       uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
1002       uint64_t mem_used = 0;
1003 
1004       STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) /
1005                        sizeof(uint32_t) ==
1006                     PVR_IMAGE_DESCRIPTOR_SIZE);
1007       mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS;
1008 
1009       result = pvr_spm_setup_texture_state_words(device,
1010                                                  dword_count,
1011                                                  framebuffer_size,
1012                                                  hw_render->sample_count,
1013                                                  next_scratch_buffer_addr,
1014                                                  mem_ptr_u64,
1015                                                  &mem_used);
1016       if (result != VK_SUCCESS)
1017          goto err_free_consts_buffer;
1018 
1019       PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
1020    }
1021 
1022    assert(spm_load_program_idx <
1023           ARRAY_SIZE(device->spm_load_state.load_program));
1024    load_program_state =
1025       &device->spm_load_state.load_program[spm_load_program_idx];
1026 
1027    result = pvr_pds_bgnd_program_create_and_upload(
1028       device,
1029       load_program_state->pds_texture_program_data_size,
1030       spm_bgobj_state->consts_buffer,
1031       info->const_shared_regs,
1032       &pds_texture_data_upload);
1033    if (result != VK_SUCCESS)
1034       goto err_free_consts_buffer;
1035 
1036    spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo;
1037 
1038    /* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */
1039 
1040    /* clang-format off */
1041    pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0],
1042                  CR_PDS_BGRND0_BASE,
1043                  value) {
1044       /* clang-format on */
1045       value.shader_addr = load_program_state->pds_pixel_program_offset;
1046       value.texunicode_addr = load_program_state->pds_uniform_program_offset;
1047    }
1048 
1049    /* clang-format off */
1050    pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1],
1051                  CR_PDS_BGRND1_BASE,
1052                  value) {
1053       /* clang-format on */
1054       value.texturedata_addr =
1055          PVR_DEV_ADDR(pds_texture_data_upload.data_offset);
1056    }
1057 
1058    /* clang-format off */
1059    pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2],
1060                  CR_PDS_BGRND3_SIZEINFO,
1061                  value) {
1062       /* clang-format on */
1063       value.usc_sharedsize =
1064          DIV_ROUND_UP(info->const_shared_regs,
1065                       PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
1066       value.pds_texturestatesize = DIV_ROUND_UP(
1067          pds_texture_data_upload.data_size,
1068          PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
1069       value.pds_tempsize =
1070          DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count,
1071                       PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
1072    }
1073 
1074    return VK_SUCCESS;
1075 
1076 err_free_consts_buffer:
1077    pvr_bo_free(device, spm_bgobj_state->consts_buffer);
1078 
1079    return result;
1080 }
1081 
pvr_spm_finish_bgobj_state(struct pvr_device * device,struct pvr_spm_bgobj_state * spm_bgobj_state)1082 void pvr_spm_finish_bgobj_state(struct pvr_device *device,
1083                                 struct pvr_spm_bgobj_state *spm_bgobj_state)
1084 {
1085    pvr_bo_suballoc_free(spm_bgobj_state->pds_texture_data_upload);
1086    pvr_bo_free(device, spm_bgobj_state->consts_buffer);
1087 }
1088 
1089 #undef PVR_DEV_ADDR_ADVANCE
1090