1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <vulkan/vulkan.h>
30
31 #include "hwdef/rogue_hw_utils.h"
32 #include "pvr_bo.h"
33 #include "pvr_cdm_load_sr.h"
34 #include "pvr_common.h"
35 #include "pvr_csb.h"
36 #include "pvr_job_context.h"
37 #include "pvr_pds.h"
38 #include "pvr_private.h"
39 #include "pvr_transfer_frag_store.h"
40 #include "pvr_types.h"
41 #include "pvr_uscgen.h"
42 #include "pvr_vdm_load_sr.h"
43 #include "pvr_vdm_store_sr.h"
44 #include "pvr_winsys.h"
45 #include "util/macros.h"
46 #include "util/os_file.h"
47 #include "util/u_dynarray.h"
48 #include "vk_alloc.h"
49 #include "vk_log.h"
50
51 /* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
52 * value when constructing the control stream?
53 */
54 /* The VDM callstack is used by the hardware to implement control stream links
55 * with a return, i.e. sub-control streams/subroutines. This value specifies the
56 * maximum callstack depth.
57 */
58 #define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
59
60 #define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
61
pvr_ctx_reset_cmd_init(struct pvr_device * device,struct pvr_reset_cmd * const reset_cmd)62 static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
63 struct pvr_reset_cmd *const reset_cmd)
64 {
65 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
66
67 /* The reset framework depends on compute support in the hw. */
68 assert(PVR_HAS_FEATURE(dev_info, compute));
69
70 if (PVR_HAS_QUIRK(dev_info, 51764))
71 pvr_finishme("Missing reset support for brn51764");
72
73 if (PVR_HAS_QUIRK(dev_info, 58839))
74 pvr_finishme("Missing reset support for brn58839");
75
76 return VK_SUCCESS;
77 }
78
pvr_ctx_reset_cmd_fini(struct pvr_device * device,struct pvr_reset_cmd * reset_cmd)79 static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
80 struct pvr_reset_cmd *reset_cmd)
81
82 {
83 /* TODO: reset command cleanup. */
84 }
85
pvr_pds_pt_store_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)86 static VkResult pvr_pds_pt_store_program_create_and_upload(
87 struct pvr_device *device,
88 struct pvr_bo *pt_bo,
89 uint32_t pt_bo_size,
90 struct pvr_pds_upload *const pds_upload_out)
91 {
92 struct pvr_pds_stream_out_terminate_program program = { 0 };
93 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
94 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
95 size_t staging_buffer_size;
96 uint32_t *staging_buffer;
97 uint32_t *data_buffer;
98 uint32_t *code_buffer;
99 VkResult result;
100
101 /* Check the bo size can be converted to dwords without any rounding. */
102 assert(pt_bo_size % 4 == 0);
103
104 program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
105 program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
106
107 pvr_pds_generate_stream_out_terminate_program(&program,
108 NULL,
109 PDS_GENERATE_SIZES,
110 dev_info);
111
112 staging_buffer_size = (program.stream_out_terminate_pds_data_size +
113 program.stream_out_terminate_pds_code_size) *
114 sizeof(*staging_buffer);
115
116 staging_buffer = vk_zalloc(&device->vk.alloc,
117 staging_buffer_size,
118 8,
119 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
120 if (!staging_buffer)
121 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
122
123 data_buffer = staging_buffer;
124 code_buffer =
125 pvr_pds_generate_stream_out_terminate_program(&program,
126 data_buffer,
127 PDS_GENERATE_DATA_SEGMENT,
128 dev_info);
129 pvr_pds_generate_stream_out_terminate_program(&program,
130 code_buffer,
131 PDS_GENERATE_CODE_SEGMENT,
132 dev_info);
133
134 /* This PDS program is passed to the HW via the PPP state words. These only
135 * allow the data segment address to be specified and expect the code
136 * segment to immediately follow. Assume the code alignment is the same as
137 * the data.
138 */
139 result =
140 pvr_gpu_upload_pds(device,
141 data_buffer,
142 program.stream_out_terminate_pds_data_size,
143 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
144 code_buffer,
145 program.stream_out_terminate_pds_code_size,
146 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
147 cache_line_size,
148 pds_upload_out);
149
150 vk_free(&device->vk.alloc, staging_buffer);
151
152 return result;
153 }
154
pvr_pds_pt_resume_program_create_and_upload(struct pvr_device * device,struct pvr_bo * pt_bo,uint32_t pt_bo_size,struct pvr_pds_upload * const pds_upload_out)155 static VkResult pvr_pds_pt_resume_program_create_and_upload(
156 struct pvr_device *device,
157 struct pvr_bo *pt_bo,
158 uint32_t pt_bo_size,
159 struct pvr_pds_upload *const pds_upload_out)
160 {
161 struct pvr_pds_stream_out_init_program program = { 0 };
162 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
163 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
164 size_t staging_buffer_size;
165 uint32_t *staging_buffer;
166 uint32_t *data_buffer;
167 uint32_t *code_buffer;
168 VkResult result;
169
170 /* Check the bo size can be converted to dwords without any rounding. */
171 assert(pt_bo_size % 4 == 0);
172
173 program.num_buffers = 1;
174 program.pds_buffer_data_size[0] = pt_bo_size / 4;
175 program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
176
177 pvr_pds_generate_stream_out_init_program(&program,
178 NULL,
179 false,
180 PDS_GENERATE_SIZES,
181 dev_info);
182
183 staging_buffer_size = (program.stream_out_init_pds_data_size +
184 program.stream_out_init_pds_code_size) *
185 sizeof(*staging_buffer);
186
187 staging_buffer = vk_zalloc(&device->vk.alloc,
188 staging_buffer_size,
189 8,
190 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
191 if (!staging_buffer)
192 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
193
194 data_buffer = staging_buffer;
195 code_buffer =
196 pvr_pds_generate_stream_out_init_program(&program,
197 data_buffer,
198 false,
199 PDS_GENERATE_DATA_SEGMENT,
200 dev_info);
201 pvr_pds_generate_stream_out_init_program(&program,
202 code_buffer,
203 false,
204 PDS_GENERATE_CODE_SEGMENT,
205 dev_info);
206
207 /* This PDS program is passed to the HW via the PPP state words. These only
208 * allow the data segment address to be specified and expect the code
209 * segment to immediately follow. Assume the code alignment is the same as
210 * the data.
211 */
212 result =
213 pvr_gpu_upload_pds(device,
214 data_buffer,
215 program.stream_out_init_pds_data_size,
216 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
217 code_buffer,
218 program.stream_out_init_pds_code_size,
219 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
220 cache_line_size,
221 pds_upload_out);
222
223 vk_free(&device->vk.alloc, staging_buffer);
224
225 return result;
226 }
227
228 static VkResult
pvr_render_job_pt_programs_setup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)229 pvr_render_job_pt_programs_setup(struct pvr_device *device,
230 struct rogue_pt_programs *pt_programs)
231 {
232 VkResult result;
233
234 result = pvr_bo_alloc(device,
235 device->heaps.pds_heap,
236 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
237 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
238 PVR_BO_ALLOC_FLAG_CPU_ACCESS,
239 &pt_programs->store_resume_state_bo);
240 if (result != VK_SUCCESS)
241 return result;
242
243 result = pvr_pds_pt_store_program_create_and_upload(
244 device,
245 pt_programs->store_resume_state_bo,
246 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
247 &pt_programs->pds_store_program);
248 if (result != VK_SUCCESS)
249 goto err_free_store_resume_state_bo;
250
251 result = pvr_pds_pt_resume_program_create_and_upload(
252 device,
253 pt_programs->store_resume_state_bo,
254 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
255 &pt_programs->pds_resume_program);
256 if (result != VK_SUCCESS)
257 goto err_free_pds_store_program;
258
259 return VK_SUCCESS;
260
261 err_free_pds_store_program:
262 pvr_bo_suballoc_free(pt_programs->pds_store_program.pvr_bo);
263
264 err_free_store_resume_state_bo:
265 pvr_bo_free(device, pt_programs->store_resume_state_bo);
266
267 return result;
268 }
269
270 static void
pvr_render_job_pt_programs_cleanup(struct pvr_device * device,struct rogue_pt_programs * pt_programs)271 pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
272 struct rogue_pt_programs *pt_programs)
273 {
274 pvr_bo_suballoc_free(pt_programs->pds_resume_program.pvr_bo);
275 pvr_bo_suballoc_free(pt_programs->pds_store_program.pvr_bo);
276 pvr_bo_free(device, pt_programs->store_resume_state_bo);
277 }
278
pvr_pds_ctx_sr_program_setup(bool cc_enable,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_shared_storing_program * const program_out)279 static void pvr_pds_ctx_sr_program_setup(
280 bool cc_enable,
281 uint64_t usc_program_upload_offset,
282 uint8_t usc_temps,
283 pvr_dev_addr_t sr_addr,
284 struct pvr_pds_shared_storing_program *const program_out)
285 {
286 /* The PDS task is the same for stores and loads. */
287 *program_out = (struct pvr_pds_shared_storing_program){
288 .cc_enable = cc_enable,
289 .doutw_control = {
290 .dest_store = PDS_UNIFIED_STORE,
291 .num_const64 = 2,
292 .doutw_data = {
293 [0] = sr_addr.addr,
294 [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
295 },
296 .last_instruction = false,
297 },
298 };
299
300 pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
301 usc_program_upload_offset,
302 usc_temps,
303 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
304 false);
305 }
306
307 /* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
308 * this. If there is a problem here it's likely that the same problem exists
309 * there so don't forget to update the compute function.
310 */
pvr_pds_render_ctx_sr_program_create_and_upload(struct pvr_device * device,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)311 static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
312 struct pvr_device *device,
313 uint64_t usc_program_upload_offset,
314 uint8_t usc_temps,
315 pvr_dev_addr_t sr_addr,
316 struct pvr_pds_upload *const pds_upload_out)
317 {
318 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
319 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
320 const uint32_t pds_data_alignment =
321 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
322
323 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
324 * and code size when using the PDS_GENERATE_SIZES mode.
325 */
326 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
327 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
328 struct pvr_pds_shared_storing_program program;
329 ASSERTED uint32_t *buffer_end;
330 uint32_t code_offset;
331
332 pvr_pds_ctx_sr_program_setup(false,
333 usc_program_upload_offset,
334 usc_temps,
335 sr_addr,
336 &program);
337
338 pvr_pds_generate_shared_storing_program(&program,
339 &staging_buffer[0],
340 PDS_GENERATE_DATA_SEGMENT,
341 dev_info);
342
343 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
344
345 buffer_end =
346 pvr_pds_generate_shared_storing_program(&program,
347 &staging_buffer[code_offset],
348 PDS_GENERATE_CODE_SEGMENT,
349 dev_info);
350
351 assert((uint32_t)(buffer_end - staging_buffer) * sizeof(staging_buffer[0]) <
352 ROGUE_PDS_TASK_PROGRAM_SIZE);
353
354 return pvr_gpu_upload_pds(device,
355 &staging_buffer[0],
356 program.data_size,
357 PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
358 &staging_buffer[code_offset],
359 program.code_size,
360 PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
361 cache_line_size,
362 pds_upload_out);
363 }
364
365 /* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
366 * this. If there is a problem here it's likely that the same problem exists
367 * there so don't forget to update the render_ctx function.
368 */
pvr_pds_compute_ctx_sr_program_create_and_upload(struct pvr_device * device,bool is_loading_program,uint64_t usc_program_upload_offset,uint8_t usc_temps,pvr_dev_addr_t sr_addr,struct pvr_pds_upload * const pds_upload_out)369 static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
370 struct pvr_device *device,
371 bool is_loading_program,
372 uint64_t usc_program_upload_offset,
373 uint8_t usc_temps,
374 pvr_dev_addr_t sr_addr,
375 struct pvr_pds_upload *const pds_upload_out)
376 {
377 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
378 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
379 const uint32_t pds_data_alignment =
380 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
381
382 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
383 * and code size when using the PDS_GENERATE_SIZES mode.
384 */
385 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
386 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
387 struct pvr_pds_shared_storing_program program;
388 uint32_t *buffer_ptr;
389 uint32_t code_offset;
390
391 pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
392 usc_program_upload_offset,
393 usc_temps,
394 sr_addr,
395 &program);
396
397 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
398 pvr_pds_generate_compute_shared_loading_program(&program,
399 &staging_buffer[0],
400 PDS_GENERATE_DATA_SEGMENT,
401 dev_info);
402 } else {
403 pvr_pds_generate_shared_storing_program(&program,
404 &staging_buffer[0],
405 PDS_GENERATE_DATA_SEGMENT,
406 dev_info);
407 }
408
409 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
410
411 buffer_ptr =
412 pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
413 PDS_GENERATE_CODE_SEGMENT);
414
415 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
416 buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
417 &program,
418 buffer_ptr,
419 PDS_GENERATE_CODE_SEGMENT,
420 dev_info);
421 } else {
422 buffer_ptr =
423 pvr_pds_generate_shared_storing_program(&program,
424 buffer_ptr,
425 PDS_GENERATE_CODE_SEGMENT,
426 dev_info);
427 }
428
429 assert((uint32_t)(buffer_ptr - staging_buffer) * sizeof(staging_buffer[0]) <
430 ROGUE_PDS_TASK_PROGRAM_SIZE);
431
432 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
433 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
434
435 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
436 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
437
438 return pvr_gpu_upload_pds(
439 device,
440 &staging_buffer[0],
441 program.data_size,
442 PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
443 &staging_buffer[code_offset],
444 (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
445 PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
446 cache_line_size,
447 pds_upload_out);
448 }
449
450 enum pvr_ctx_sr_program_target {
451 PVR_CTX_SR_RENDER_TARGET,
452 PVR_CTX_SR_COMPUTE_TARGET,
453 };
454
pvr_ctx_sr_programs_setup(struct pvr_device * device,enum pvr_ctx_sr_program_target target,struct rogue_sr_programs * sr_programs)455 static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
456 enum pvr_ctx_sr_program_target target,
457 struct rogue_sr_programs *sr_programs)
458 {
459 const uint64_t store_load_state_bo_size =
460 PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
461 ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
462 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
463 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
464 uint64_t usc_store_program_upload_offset;
465 uint64_t usc_load_program_upload_offset;
466 const uint8_t *usc_load_sr_code;
467 uint32_t usc_load_sr_code_size;
468 VkResult result;
469
470 /* Note that this is being used for both compute and render ctx. There is no
471 * compute equivalent define for the VDMCTRL unit size.
472 */
473 /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
474 sr_programs->usc.unified_size =
475 DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
476
477 result = pvr_bo_alloc(device,
478 device->heaps.pds_heap,
479 store_load_state_bo_size,
480 cache_line_size,
481 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
482 &sr_programs->store_load_state_bo);
483 if (result != VK_SUCCESS)
484 return result;
485
486 /* USC state update: SR state store. */
487
488 assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
489
490 result = pvr_gpu_upload_usc(device,
491 pvr_vdm_store_sr_code,
492 sizeof(pvr_vdm_store_sr_code),
493 cache_line_size,
494 &sr_programs->usc.store_program_bo);
495 if (result != VK_SUCCESS)
496 goto err_free_store_load_state_bo;
497
498 usc_store_program_upload_offset =
499 sr_programs->usc.store_program_bo->dev_addr.addr -
500 device->heaps.usc_heap->base_addr.addr;
501
502 /* USC state update: SR state load. */
503
504 if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
505 STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
506
507 usc_load_sr_code = pvr_cdm_load_sr_code;
508 usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
509 } else {
510 STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
511
512 usc_load_sr_code = pvr_vdm_load_sr_code;
513 usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
514 }
515
516 result = pvr_gpu_upload_usc(device,
517 usc_load_sr_code,
518 usc_load_sr_code_size,
519 cache_line_size,
520 &sr_programs->usc.load_program_bo);
521 if (result != VK_SUCCESS)
522 goto err_free_usc_store_program_bo;
523
524 usc_load_program_upload_offset =
525 sr_programs->usc.load_program_bo->dev_addr.addr -
526 device->heaps.usc_heap->base_addr.addr;
527
528 /* FIXME: The number of USC temps should be output alongside
529 * pvr_vdm_store_sr_code rather than hard coded.
530 */
531 /* Create and upload the PDS load and store programs. Point them to the
532 * appropriate USC load and store programs.
533 */
534 switch (target) {
535 case PVR_CTX_SR_RENDER_TARGET:
536 /* PDS state update: SR state store. */
537 result = pvr_pds_render_ctx_sr_program_create_and_upload(
538 device,
539 usc_store_program_upload_offset,
540 8,
541 sr_programs->store_load_state_bo->vma->dev_addr,
542 &sr_programs->pds.store_program);
543 if (result != VK_SUCCESS)
544 goto err_free_usc_load_program_bo;
545
546 /* PDS state update: SR state load. */
547 result = pvr_pds_render_ctx_sr_program_create_and_upload(
548 device,
549 usc_load_program_upload_offset,
550 20,
551 sr_programs->store_load_state_bo->vma->dev_addr,
552 &sr_programs->pds.load_program);
553 if (result != VK_SUCCESS)
554 goto err_free_pds_store_program_bo;
555
556 break;
557
558 case PVR_CTX_SR_COMPUTE_TARGET:
559 /* PDS state update: SR state store. */
560 result = pvr_pds_compute_ctx_sr_program_create_and_upload(
561 device,
562 false,
563 usc_store_program_upload_offset,
564 8,
565 sr_programs->store_load_state_bo->vma->dev_addr,
566 &sr_programs->pds.store_program);
567 if (result != VK_SUCCESS)
568 goto err_free_usc_load_program_bo;
569
570 /* PDS state update: SR state load. */
571 result = pvr_pds_compute_ctx_sr_program_create_and_upload(
572 device,
573 true,
574 usc_load_program_upload_offset,
575 20,
576 sr_programs->store_load_state_bo->vma->dev_addr,
577 &sr_programs->pds.load_program);
578 if (result != VK_SUCCESS)
579 goto err_free_pds_store_program_bo;
580
581 break;
582
583 default:
584 unreachable("Invalid target.");
585 break;
586 }
587
588 return VK_SUCCESS;
589
590 err_free_pds_store_program_bo:
591 pvr_bo_suballoc_free(sr_programs->pds.store_program.pvr_bo);
592
593 err_free_usc_load_program_bo:
594 pvr_bo_suballoc_free(sr_programs->usc.load_program_bo);
595
596 err_free_usc_store_program_bo:
597 pvr_bo_suballoc_free(sr_programs->usc.store_program_bo);
598
599 err_free_store_load_state_bo:
600 pvr_bo_free(device, sr_programs->store_load_state_bo);
601
602 return result;
603 }
604
pvr_ctx_sr_programs_cleanup(struct pvr_device * device,struct rogue_sr_programs * sr_programs)605 static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
606 struct rogue_sr_programs *sr_programs)
607 {
608 pvr_bo_suballoc_free(sr_programs->pds.load_program.pvr_bo);
609 pvr_bo_suballoc_free(sr_programs->pds.store_program.pvr_bo);
610 pvr_bo_suballoc_free(sr_programs->usc.load_program_bo);
611 pvr_bo_suballoc_free(sr_programs->usc.store_program_bo);
612 pvr_bo_free(device, sr_programs->store_load_state_bo);
613 }
614
615 static VkResult
pvr_render_ctx_switch_programs_setup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)616 pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
617 struct pvr_render_ctx_programs *programs)
618 {
619 VkResult result;
620
621 result = pvr_render_job_pt_programs_setup(device, &programs->pt);
622 if (result != VK_SUCCESS)
623 return result;
624
625 result = pvr_ctx_sr_programs_setup(device,
626 PVR_CTX_SR_RENDER_TARGET,
627 &programs->sr);
628 if (result != VK_SUCCESS)
629 goto err_pt_programs_cleanup;
630
631 return VK_SUCCESS;
632
633 err_pt_programs_cleanup:
634 pvr_render_job_pt_programs_cleanup(device, &programs->pt);
635
636 return result;
637 }
638
639 static void
pvr_render_ctx_switch_programs_cleanup(struct pvr_device * device,struct pvr_render_ctx_programs * programs)640 pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
641 struct pvr_render_ctx_programs *programs)
642 {
643 pvr_ctx_sr_programs_cleanup(device, &programs->sr);
644 pvr_render_job_pt_programs_cleanup(device, &programs->pt);
645 }
646
pvr_render_ctx_switch_init(struct pvr_device * device,struct pvr_render_ctx * ctx)647 static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
648 struct pvr_render_ctx *ctx)
649 {
650 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
651 const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
652 PVR_BO_ALLOC_FLAG_CPU_ACCESS;
653 const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
654 PVR_BO_ALLOC_FLAG_CPU_ACCESS;
655 VkResult result;
656 uint32_t i;
657
658 result = pvr_bo_alloc(device,
659 device->heaps.general_heap,
660 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
661 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
662 vdm_state_bo_flags,
663 &ctx_switch->vdm_state_bo);
664 if (result != VK_SUCCESS)
665 return result;
666
667 result = pvr_bo_alloc(device,
668 device->heaps.general_heap,
669 ROGUE_LLS_TA_STATE_BUFFER_SIZE,
670 ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
671 geom_state_bo_flags,
672 &ctx_switch->geom_state_bo);
673 if (result != VK_SUCCESS)
674 goto err_pvr_bo_free_vdm_state_bo;
675
676 for (i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
677 result =
678 pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
679 if (result != VK_SUCCESS)
680 goto err_programs_cleanup;
681 }
682
683 return VK_SUCCESS;
684
685 err_programs_cleanup:
686 for (uint32_t j = 0; j < i; j++)
687 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[j]);
688
689 pvr_bo_free(device, ctx_switch->geom_state_bo);
690
691 err_pvr_bo_free_vdm_state_bo:
692 pvr_bo_free(device, ctx_switch->vdm_state_bo);
693
694 return result;
695 }
696
pvr_render_ctx_switch_fini(struct pvr_device * device,struct pvr_render_ctx * ctx)697 static void pvr_render_ctx_switch_fini(struct pvr_device *device,
698 struct pvr_render_ctx *ctx)
699 {
700 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
701
702 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++)
703 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
704
705 pvr_bo_free(device, ctx_switch->geom_state_bo);
706 pvr_bo_free(device, ctx_switch->vdm_state_bo);
707 }
708
709 static void
pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload * pds_program,enum PVRX (VDMCTRL_USC_TARGET)usc_target,uint8_t usc_unified_size,uint32_t * const state0_out,uint32_t * const state1_out)710 pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
711 enum PVRX(VDMCTRL_USC_TARGET) usc_target,
712 uint8_t usc_unified_size,
713 uint32_t *const state0_out,
714 uint32_t *const state1_out)
715 {
716 pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
717 /* Convert the data size from dwords to bytes. */
718 const uint32_t pds_data_size = PVR_DW_TO_BYTES(pds_program->data_size);
719
720 state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
721 state.usc_target = usc_target;
722 state.usc_common_size = 0;
723 state.usc_unified_size = usc_unified_size;
724 state.pds_temp_size = 0;
725
726 assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
727 0);
728 state.pds_data_size =
729 pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
730 };
731
732 pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
733 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
734 state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
735 state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
736 }
737 }
738
739 static void
pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload * pds_program,uint32_t * const stream_out1_out,uint32_t * const stream_out2_out)740 pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
741 uint32_t *const stream_out1_out,
742 uint32_t *const stream_out2_out)
743 {
744 pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
745 /* Convert the data size from dwords to bytes. */
746 const uint32_t pds_data_size = PVR_DW_TO_BYTES(pds_program->data_size);
747
748 state.sync = true;
749
750 assert(pds_data_size %
751 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
752 0);
753 state.pds_data_size =
754 pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
755
756 state.pds_temp_size = 0;
757 }
758
759 pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
760 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
761 }
762 }
763
pvr_render_ctx_ws_static_state_init(struct pvr_render_ctx * ctx,struct pvr_winsys_render_ctx_static_state * static_state)764 static void pvr_render_ctx_ws_static_state_init(
765 struct pvr_render_ctx *ctx,
766 struct pvr_winsys_render_ctx_static_state *static_state)
767 {
768 uint64_t *q_dst;
769 uint32_t *d_dst;
770
771 q_dst = &static_state->vdm_ctx_state_base_addr;
772 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) {
773 base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
774 }
775
776 q_dst = &static_state->geom_ctx_state_base_addr;
777 pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) {
778 base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
779 }
780
781 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
782 struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
783 struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
784
785 /* Context store state. */
786 q_dst = &static_state->geom_state[i].vdm_ctx_store_task0;
787 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
788 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
789 PVRX(VDMCTRL_USC_TARGET_ANY),
790 sr_prog->usc.unified_size,
791 &task0.pds_state0,
792 &task0.pds_state1);
793 }
794
795 d_dst = &static_state->geom_state[i].vdm_ctx_store_task1;
796 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
797 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
798 state.pds_code_addr =
799 PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset);
800 }
801 }
802
803 q_dst = &static_state->geom_state[i].vdm_ctx_store_task2;
804 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
805 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
806 &task2.stream_out1,
807 &task2.stream_out2);
808 }
809
810 /* Context resume state. */
811 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
812 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
813 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
814 PVRX(VDMCTRL_USC_TARGET_ALL),
815 sr_prog->usc.unified_size,
816 &task0.pds_state0,
817 &task0.pds_state1);
818 }
819
820 d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
821 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
822 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
823 state.pds_code_addr =
824 PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset);
825 }
826 }
827
828 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
829 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
830 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
831 &task2.stream_out1,
832 &task2.stream_out2);
833 }
834 }
835 }
836
pvr_render_ctx_ws_create_info_init(struct pvr_render_ctx * ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_render_ctx_create_info * create_info)837 static void pvr_render_ctx_ws_create_info_init(
838 struct pvr_render_ctx *ctx,
839 enum pvr_winsys_ctx_priority priority,
840 struct pvr_winsys_render_ctx_create_info *create_info)
841 {
842 create_info->priority = priority;
843 create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
844
845 pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
846 }
847
pvr_render_ctx_create(struct pvr_device * device,enum pvr_winsys_ctx_priority priority,struct pvr_render_ctx ** const ctx_out)848 VkResult pvr_render_ctx_create(struct pvr_device *device,
849 enum pvr_winsys_ctx_priority priority,
850 struct pvr_render_ctx **const ctx_out)
851 {
852 const uint64_t vdm_callstack_size =
853 sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
854 struct pvr_winsys_render_ctx_create_info create_info;
855 struct pvr_render_ctx *ctx;
856 VkResult result;
857
858 ctx = vk_alloc(&device->vk.alloc,
859 sizeof(*ctx),
860 8,
861 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
862 if (!ctx)
863 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
864
865 ctx->device = device;
866
867 result = pvr_bo_alloc(device,
868 device->heaps.general_heap,
869 vdm_callstack_size,
870 PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
871 0,
872 &ctx->vdm_callstack_bo);
873 if (result != VK_SUCCESS)
874 goto err_vk_free_ctx;
875
876 result = pvr_render_ctx_switch_init(device, ctx);
877 if (result != VK_SUCCESS)
878 goto err_free_vdm_callstack_bo;
879
880 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
881 if (result != VK_SUCCESS)
882 goto err_render_ctx_switch_fini;
883
884 /* ctx must be fully initialized by this point since
885 * pvr_render_ctx_ws_create_info_init() depends on this.
886 */
887 pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
888
889 result = device->ws->ops->render_ctx_create(device->ws,
890 &create_info,
891 &ctx->ws_ctx);
892 if (result != VK_SUCCESS)
893 goto err_render_ctx_reset_cmd_fini;
894
895 *ctx_out = ctx;
896
897 return VK_SUCCESS;
898
899 err_render_ctx_reset_cmd_fini:
900 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
901
902 err_render_ctx_switch_fini:
903 pvr_render_ctx_switch_fini(device, ctx);
904
905 err_free_vdm_callstack_bo:
906 pvr_bo_free(device, ctx->vdm_callstack_bo);
907
908 err_vk_free_ctx:
909 vk_free(&device->vk.alloc, ctx);
910
911 return result;
912 }
913
pvr_render_ctx_destroy(struct pvr_render_ctx * ctx)914 void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
915 {
916 struct pvr_device *device = ctx->device;
917
918 device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
919
920 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
921 pvr_render_ctx_switch_fini(device, ctx);
922 pvr_bo_free(device, ctx->vdm_callstack_bo);
923 vk_free(&device->vk.alloc, ctx);
924 }
925
pvr_pds_sr_fence_terminate_program_create_and_upload(struct pvr_device * device,struct pvr_pds_upload * const pds_upload_out)926 static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
927 struct pvr_device *device,
928 struct pvr_pds_upload *const pds_upload_out)
929 {
930 const uint32_t pds_data_alignment =
931 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
932 const struct pvr_device_runtime_info *dev_runtime_info =
933 &device->pdevice->dev_runtime_info;
934 ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
935 uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
936 struct pvr_pds_fence_program program = { 0 };
937 ASSERTED uint32_t *buffer_end;
938 uint32_t code_offset;
939 uint32_t data_size;
940
941 /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
942 assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
943 dev_runtime_info->num_phantoms >= 2));
944
945 pvr_pds_generate_fence_terminate_program(&program,
946 staging_buffer,
947 PDS_GENERATE_DATA_SEGMENT,
948 &device->pdevice->dev_info);
949
950 /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
951 * when we generate the code segment. Implement
952 * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
953 * This behavior doesn't seem consistent with the rest of the api. For now
954 * we store the size in a variable.
955 */
956 data_size = program.data_size;
957 code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
958
959 buffer_end =
960 pvr_pds_generate_fence_terminate_program(&program,
961 &staging_buffer[code_offset],
962 PDS_GENERATE_CODE_SEGMENT,
963 &device->pdevice->dev_info);
964
965 assert((uint64_t)(buffer_end - staging_buffer) * sizeof(staging_buffer[0]) <
966 ROGUE_PDS_TASK_PROGRAM_SIZE);
967
968 return pvr_gpu_upload_pds(device,
969 staging_buffer,
970 data_size,
971 PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
972 &staging_buffer[code_offset],
973 program.code_size,
974 PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
975 0,
976 pds_upload_out);
977 }
978
pvr_compute_ctx_ws_static_state_init(const struct pvr_device_info * const dev_info,const struct pvr_compute_ctx * const ctx,struct pvr_winsys_compute_ctx_static_state * const static_state)979 static void pvr_compute_ctx_ws_static_state_init(
980 const struct pvr_device_info *const dev_info,
981 const struct pvr_compute_ctx *const ctx,
982 struct pvr_winsys_compute_ctx_static_state *const static_state)
983 {
984 const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
985
986 /* CR_CDM_CONTEXT_... use state store program info. */
987
988 pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
989 CR_CDM_CONTEXT_PDS0,
990 state) {
991 state.data_addr =
992 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset);
993 state.code_addr =
994 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset);
995 }
996
997 pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
998 CR_CDM_CONTEXT_PDS0,
999 state) {
1000 state.data_addr =
1001 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset);
1002 state.code_addr =
1003 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset);
1004 }
1005
1006 pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
1007 CR_CDM_CONTEXT_PDS1,
1008 state) {
1009 const uint32_t store_program_data_size =
1010 PVR_DW_TO_BYTES(ctx_switch->sr[0].pds.store_program.data_size);
1011
1012 state.pds_seq_dep = true;
1013 state.usc_seq_dep = false;
1014 state.target = true;
1015 state.unified_size = ctx_switch->sr[0].usc.unified_size;
1016 state.common_shared = false;
1017 state.common_size = 0;
1018 state.temp_size = 0;
1019
1020 assert(store_program_data_size %
1021 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1022 0);
1023 state.data_size = store_program_data_size /
1024 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1025
1026 state.fence = true;
1027 }
1028
1029 /* CR_CDM_TERMINATE_... use fence terminate info. */
1030
1031 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
1032 CR_CDM_TERMINATE_PDS,
1033 state) {
1034 state.data_addr =
1035 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset);
1036 state.code_addr =
1037 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset);
1038 }
1039
1040 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
1041 CR_CDM_TERMINATE_PDS1,
1042 state) {
1043 /* Convert the data size from dwords to bytes. */
1044 const uint32_t fence_terminate_program_data_size =
1045 PVR_DW_TO_BYTES(ctx_switch->sr_fence_terminate_program.data_size);
1046
1047 state.pds_seq_dep = true;
1048 state.usc_seq_dep = false;
1049 state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
1050 state.unified_size = 0;
1051 /* Common store is for shareds -- this will free the partitions. */
1052 state.common_shared = true;
1053 state.common_size = 0;
1054 state.temp_size = 0;
1055
1056 assert(fence_terminate_program_data_size %
1057 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1058 0);
1059 state.data_size = fence_terminate_program_data_size /
1060 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1061 state.fence = true;
1062 }
1063
1064 /* CR_CDM_RESUME_... use state load program info. */
1065
1066 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
1067 CR_CDM_CONTEXT_LOAD_PDS0,
1068 state) {
1069 state.data_addr =
1070 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset);
1071 state.code_addr =
1072 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset);
1073 }
1074
1075 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
1076 CR_CDM_CONTEXT_LOAD_PDS0,
1077 state) {
1078 state.data_addr =
1079 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset);
1080 state.code_addr =
1081 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset);
1082 }
1083 }
1084
pvr_compute_ctx_ws_create_info_init(const struct pvr_compute_ctx * const ctx,enum pvr_winsys_ctx_priority priority,struct pvr_winsys_compute_ctx_create_info * const create_info)1085 static void pvr_compute_ctx_ws_create_info_init(
1086 const struct pvr_compute_ctx *const ctx,
1087 enum pvr_winsys_ctx_priority priority,
1088 struct pvr_winsys_compute_ctx_create_info *const create_info)
1089 {
1090 create_info->priority = priority;
1091
1092 pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
1093 ctx,
1094 &create_info->static_state);
1095 }
1096
pvr_compute_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_compute_ctx ** const ctx_out)1097 VkResult pvr_compute_ctx_create(struct pvr_device *const device,
1098 enum pvr_winsys_ctx_priority priority,
1099 struct pvr_compute_ctx **const ctx_out)
1100 {
1101 struct pvr_winsys_compute_ctx_create_info create_info;
1102 struct pvr_compute_ctx *ctx;
1103 VkResult result;
1104
1105 ctx = vk_alloc(&device->vk.alloc,
1106 sizeof(*ctx),
1107 8,
1108 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1109 if (!ctx)
1110 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1111
1112 ctx->device = device;
1113
1114 result = pvr_bo_alloc(
1115 device,
1116 device->heaps.general_heap,
1117 rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
1118 rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
1119 PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
1120 &ctx->ctx_switch.compute_state_bo);
1121 if (result != VK_SUCCESS)
1122 goto err_free_ctx;
1123
1124 /* TODO: Change this so that enabling storage to B doesn't change the array
1125 * size. Instead of looping we could unroll this and have the second
1126 * programs setup depending on the B enable. Doing it that way would make
1127 * things more obvious.
1128 */
1129 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
1130 result = pvr_ctx_sr_programs_setup(device,
1131 PVR_CTX_SR_COMPUTE_TARGET,
1132 &ctx->ctx_switch.sr[i]);
1133 if (result != VK_SUCCESS) {
1134 for (uint32_t j = 0; j < i; j++)
1135 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
1136
1137 goto err_free_state_buffer;
1138 }
1139 }
1140
1141 result = pvr_pds_sr_fence_terminate_program_create_and_upload(
1142 device,
1143 &ctx->ctx_switch.sr_fence_terminate_program);
1144 if (result != VK_SUCCESS)
1145 goto err_free_sr_programs;
1146
1147 pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
1148
1149 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1150 if (result != VK_SUCCESS)
1151 goto err_free_pds_fence_terminate_program;
1152
1153 result = device->ws->ops->compute_ctx_create(device->ws,
1154 &create_info,
1155 &ctx->ws_ctx);
1156 if (result != VK_SUCCESS)
1157 goto err_fini_reset_cmd;
1158
1159 *ctx_out = ctx;
1160
1161 return VK_SUCCESS;
1162
1163 err_fini_reset_cmd:
1164 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1165
1166 err_free_pds_fence_terminate_program:
1167 pvr_bo_suballoc_free(ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1168
1169 err_free_sr_programs:
1170 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1171 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1172
1173 err_free_state_buffer:
1174 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1175
1176 err_free_ctx:
1177 vk_free(&device->vk.alloc, ctx);
1178
1179 return result;
1180 }
1181
pvr_compute_ctx_destroy(struct pvr_compute_ctx * const ctx)1182 void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
1183 {
1184 struct pvr_device *device = ctx->device;
1185
1186 device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
1187
1188 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1189
1190 pvr_bo_suballoc_free(ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1191 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1192 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1193
1194 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1195
1196 vk_free(&device->vk.alloc, ctx);
1197 }
1198
pvr_transfer_ctx_ws_create_info_init(enum pvr_winsys_ctx_priority priority,struct pvr_winsys_transfer_ctx_create_info * const create_info)1199 static void pvr_transfer_ctx_ws_create_info_init(
1200 enum pvr_winsys_ctx_priority priority,
1201 struct pvr_winsys_transfer_ctx_create_info *const create_info)
1202 {
1203 create_info->priority = priority;
1204 }
1205
pvr_transfer_eot_shaders_init(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1206 static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
1207 struct pvr_transfer_ctx *ctx)
1208 {
1209 uint64_t rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS];
1210
1211 /* Setup start indexes of the shared registers that will contain the PBE
1212 * state words for each render target. These must match the indexes used in
1213 * pvr_pds_generate_pixel_event(), which is used to generate the
1214 * corresponding PDS program in pvr_pbe_setup_emit() via
1215 * pvr_pds_generate_pixel_event_data_segment() and
1216 * pvr_pds_generate_pixel_event_code_segment().
1217 */
1218 /* TODO: store the shared register information somewhere so that it can be
1219 * shared with pvr_pbe_setup_emit() rather than having the shared register
1220 * indexes and number of shared registers hard coded in
1221 * pvr_pds_generate_pixel_event().
1222 */
1223 for (uint32_t i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++)
1224 rt_pbe_regs[i] = i * PVR_STATE_PBE_DWORDS;
1225
1226 STATIC_ASSERT(ARRAY_SIZE(rt_pbe_regs) == ARRAY_SIZE(ctx->usc_eot_bos));
1227
1228 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->usc_eot_bos); i++) {
1229 const uint32_t cache_line_size =
1230 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1231 const unsigned rt_count = i + 1;
1232 struct util_dynarray eot_bin;
1233 VkResult result;
1234
1235 pvr_uscgen_tq_eot(rt_count, rt_pbe_regs, &eot_bin);
1236
1237 result = pvr_gpu_upload_usc(device,
1238 util_dynarray_begin(&eot_bin),
1239 eot_bin.size,
1240 cache_line_size,
1241 &ctx->usc_eot_bos[i]);
1242 util_dynarray_fini(&eot_bin);
1243 if (result != VK_SUCCESS) {
1244 for (uint32_t j = 0; j < i; j++)
1245 pvr_bo_suballoc_free(ctx->usc_eot_bos[j]);
1246
1247 return result;
1248 }
1249 }
1250
1251 return VK_SUCCESS;
1252 }
1253
pvr_transfer_eot_shaders_fini(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1254 static void pvr_transfer_eot_shaders_fini(struct pvr_device *device,
1255 struct pvr_transfer_ctx *ctx)
1256 {
1257 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->usc_eot_bos); i++)
1258 pvr_bo_suballoc_free(ctx->usc_eot_bos[i]);
1259 }
1260
pvr_transfer_ctx_shaders_init(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1261 static VkResult pvr_transfer_ctx_shaders_init(struct pvr_device *device,
1262 struct pvr_transfer_ctx *ctx)
1263 {
1264 VkResult result;
1265
1266 result = pvr_transfer_frag_store_init(device, &ctx->frag_store);
1267 if (result != VK_SUCCESS)
1268 goto err_out;
1269
1270 result = pvr_transfer_eot_shaders_init(device, ctx);
1271 if (result != VK_SUCCESS)
1272 goto err_frag_store_fini;
1273
1274 return VK_SUCCESS;
1275
1276 err_frag_store_fini:
1277 pvr_transfer_frag_store_fini(device, &ctx->frag_store);
1278
1279 err_out:
1280 return result;
1281 }
1282
pvr_transfer_ctx_shaders_fini(struct pvr_device * device,struct pvr_transfer_ctx * ctx)1283 static void pvr_transfer_ctx_shaders_fini(struct pvr_device *device,
1284 struct pvr_transfer_ctx *ctx)
1285 {
1286 pvr_transfer_eot_shaders_fini(device, ctx);
1287 pvr_transfer_frag_store_fini(device, &ctx->frag_store);
1288 }
1289
pvr_transfer_ctx_create(struct pvr_device * const device,enum pvr_winsys_ctx_priority priority,struct pvr_transfer_ctx ** const ctx_out)1290 VkResult pvr_transfer_ctx_create(struct pvr_device *const device,
1291 enum pvr_winsys_ctx_priority priority,
1292 struct pvr_transfer_ctx **const ctx_out)
1293 {
1294 struct pvr_winsys_transfer_ctx_create_info create_info;
1295 struct pvr_transfer_ctx *ctx;
1296 VkResult result;
1297
1298 ctx = vk_zalloc(&device->vk.alloc,
1299 sizeof(*ctx),
1300 8U,
1301 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1302 if (!ctx)
1303 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1304
1305 ctx->device = device;
1306
1307 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1308 if (result != VK_SUCCESS)
1309 goto err_free_ctx;
1310
1311 pvr_transfer_ctx_ws_create_info_init(priority, &create_info);
1312
1313 result = device->ws->ops->transfer_ctx_create(device->ws,
1314 &create_info,
1315 &ctx->ws_ctx);
1316 if (result != VK_SUCCESS)
1317 goto err_fini_reset_cmd;
1318
1319 result = pvr_transfer_ctx_shaders_init(device, ctx);
1320 if (result != VK_SUCCESS)
1321 goto err_destroy_transfer_ctx;
1322
1323 /* Create the PDS Uniform/Tex state code segment array. */
1324 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1325 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1326 if (i == 0U && j == 0U)
1327 continue;
1328
1329 result = pvr_pds_unitex_state_program_create_and_upload(
1330 device,
1331 NULL,
1332 i,
1333 j,
1334 &ctx->pds_unitex_code[i][j]);
1335 if (result != VK_SUCCESS) {
1336 goto err_free_pds_unitex_bos;
1337 }
1338 }
1339 }
1340
1341 *ctx_out = ctx;
1342
1343 return VK_SUCCESS;
1344
1345 err_free_pds_unitex_bos:
1346 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1347 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1348 if (!ctx->pds_unitex_code[i][j].pvr_bo)
1349 continue;
1350
1351 pvr_bo_suballoc_free(ctx->pds_unitex_code[i][j].pvr_bo);
1352 }
1353 }
1354
1355 pvr_transfer_ctx_shaders_fini(device, ctx);
1356
1357 err_destroy_transfer_ctx:
1358 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1359
1360 err_fini_reset_cmd:
1361 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1362
1363 err_free_ctx:
1364 vk_free(&device->vk.alloc, ctx);
1365
1366 return result;
1367 }
1368
pvr_transfer_ctx_destroy(struct pvr_transfer_ctx * const ctx)1369 void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx)
1370 {
1371 struct pvr_device *device = ctx->device;
1372
1373 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1374 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1375 if (!ctx->pds_unitex_code[i][j].pvr_bo)
1376 continue;
1377
1378 pvr_bo_suballoc_free(ctx->pds_unitex_code[i][j].pvr_bo);
1379 }
1380 }
1381
1382 pvr_transfer_ctx_shaders_fini(device, ctx);
1383 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1384 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1385 vk_free(&device->vk.alloc, ctx);
1386 }
1387