1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdint.h>
25 #include <vulkan/vulkan_core.h>
26
27 #include "hwdef/rogue_hw_utils.h"
28 #include "pvr_clear.h"
29 #include "pvr_hardcode.h"
30 #include "pvr_pds.h"
31 #include "pvr_private.h"
32 #include "pvr_shader_factory.h"
33 #include "pvr_static_shaders.h"
34 #include "pvr_types.h"
35 #include "vk_alloc.h"
36 #include "vk_log.h"
37
pvr_device_setup_graphics_static_clear_ppp_base(struct pvr_static_clear_ppp_base * const base)38 static void pvr_device_setup_graphics_static_clear_ppp_base(
39 struct pvr_static_clear_ppp_base *const base)
40 {
41 pvr_csb_pack (&base->wclamp, TA_WCLAMP, wclamp) {
42 wclamp.val = fui(0.00001f);
43 }
44
45 /* clang-format off */
46 pvr_csb_pack (&base->varying_word[0], TA_STATE_VARYING0, varying0);
47 pvr_csb_pack (&base->varying_word[1], TA_STATE_VARYING1, varying1);
48 pvr_csb_pack (&base->varying_word[2], TA_STATE_VARYING2, varying2);
49 /* clang-format on */
50
51 pvr_csb_pack (&base->ppp_ctrl, TA_STATE_PPP_CTRL, ppp_ctrl) {
52 ppp_ctrl.pretransform = true;
53 ppp_ctrl.cullmode = PVRX(TA_CULLMODE_NO_CULLING);
54 }
55
56 /* clang-format off */
57 pvr_csb_pack (&base->stream_out0, TA_STATE_STREAM_OUT0, stream_out0);
58 /* clang-format on */
59 }
60
pvr_device_setup_graphics_static_clear_ppp_templates(struct pvr_static_clear_ppp_template templates[static PVR_STATIC_CLEAR_VARIANT_COUNT])61 static void pvr_device_setup_graphics_static_clear_ppp_templates(
62 struct pvr_static_clear_ppp_template
63 templates[static PVR_STATIC_CLEAR_VARIANT_COUNT])
64 {
65 for (uint32_t i = 0; i < PVR_STATIC_CLEAR_VARIANT_COUNT; i++) {
66 const bool has_color = !!(i & VK_IMAGE_ASPECT_COLOR_BIT);
67 const bool has_depth = !!(i & VK_IMAGE_ASPECT_DEPTH_BIT);
68 const bool has_stencil = !!(i & VK_IMAGE_ASPECT_STENCIL_BIT);
69
70 struct pvr_static_clear_ppp_template *const template = &templates[i];
71
72 template->requires_pds_state = has_color;
73
74 pvr_csb_pack (&template->header, TA_STATE_HEADER, header) {
75 header.pres_stream_out_size = true;
76 header.pres_ppp_ctrl = true;
77 header.pres_varying_word2 = true;
78 header.pres_varying_word1 = true;
79 header.pres_varying_word0 = true;
80 header.pres_outselects = true;
81 header.pres_wclamp = true;
82 header.pres_region_clip = true;
83 header.pres_pds_state_ptr2 = template->requires_pds_state;
84 header.pres_pds_state_ptr1 = template->requires_pds_state;
85 header.pres_pds_state_ptr0 = template->requires_pds_state;
86 header.pres_ispctl_fb = true;
87 header.pres_ispctl_fa = true;
88 header.pres_ispctl = true;
89 }
90
91 #define CS_HEADER(cs) \
92 (struct PVRX(cs)) \
93 { \
94 pvr_cmd_header(cs) \
95 }
96
97 template->config.ispctl = CS_HEADER(TA_STATE_ISPCTL);
98 template->config.ispctl.tagwritedisable = !has_color;
99 template->config.ispctl.bpres = true;
100
101 template->config.ispa = CS_HEADER(TA_STATE_ISPA);
102 template->config.ispa.objtype = PVRX(TA_OBJTYPE_TRIANGLE);
103 template->config.ispa.passtype = PVRX(TA_PASSTYPE_TRANSLUCENT);
104 template->config.ispa.dwritedisable = !has_depth;
105 template->config.ispa.dcmpmode = (i == 0) ? PVRX(TA_CMPMODE_NEVER)
106 : PVRX(TA_CMPMODE_ALWAYS);
107 template->config.ispa.sref =
108 has_stencil ? PVRX(TA_STATE_ISPA_SREF_SIZE_MAX) : 0;
109
110 pvr_csb_pack (&template->ispb, TA_STATE_ISPB, ispb) {
111 ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS);
112 ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP);
113 ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP);
114
115 ispb.sop3 = has_stencil ? PVRX(TA_ISPB_STENCILOP_REPLACE)
116 : PVRX(TA_ISPB_STENCILOP_KEEP);
117
118 ispb.swmask = has_stencil ? 0xFF : 0;
119 }
120
121 template->config.pds_state = NULL;
122
123 template->config.region_clip0 = CS_HEADER(TA_REGION_CLIP0);
124 template->config.region_clip0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE);
125 template->config.region_clip0.left = 0;
126 template->config.region_clip0.right = PVRX(TA_REGION_CLIP_MAX);
127
128 template->config.region_clip1 = CS_HEADER(TA_REGION_CLIP1);
129 template->config.region_clip1.top = 0;
130 template->config.region_clip1.bottom = PVRX(TA_REGION_CLIP_MAX);
131
132 template->config.output_sel = CS_HEADER(TA_OUTPUT_SEL);
133 template->config.output_sel.vtxsize = 4;
134 template->config.output_sel.rhw_pres = true;
135
136 #undef CS_HEADER
137 }
138 }
139
140 /**
141 * \brief Emit geom state from a configurable template.
142 *
143 * Note that the state is emitted by joining the template with a base so the
144 * base must have been setup before calling this.
145 *
146 * \param[in] csb Control stream to emit to.
147 * \param[in] template The configured template.
148 * \param[out] pvr_bo_out Uploaded state's pvr_bo object.
149 *
150 * \return VK_SUCCESS if the state was successfully uploaded.
151 */
pvr_emit_ppp_from_template(struct pvr_csb * const csb,const struct pvr_static_clear_ppp_template * const template,struct pvr_suballoc_bo ** const pvr_bo_out)152 VkResult pvr_emit_ppp_from_template(
153 struct pvr_csb *const csb,
154 const struct pvr_static_clear_ppp_template *const template,
155 struct pvr_suballoc_bo **const pvr_bo_out)
156 {
157 const uint32_t dword_count =
158 pvr_cmd_length(TA_STATE_HEADER) + pvr_cmd_length(TA_STATE_ISPCTL) +
159 pvr_cmd_length(TA_STATE_ISPA) + pvr_cmd_length(TA_STATE_ISPB) +
160 (template->requires_pds_state ? PVR_STATIC_CLEAR_PDS_STATE_COUNT : 0) +
161 pvr_cmd_length(TA_REGION_CLIP0) + pvr_cmd_length(TA_REGION_CLIP1) +
162 pvr_cmd_length(TA_WCLAMP) + pvr_cmd_length(TA_OUTPUT_SEL) +
163 pvr_cmd_length(TA_STATE_VARYING0) + pvr_cmd_length(TA_STATE_VARYING1) +
164 pvr_cmd_length(TA_STATE_VARYING2) + pvr_cmd_length(TA_STATE_PPP_CTRL) +
165 pvr_cmd_length(TA_STATE_STREAM_OUT0);
166
167 struct pvr_device *const device = csb->device;
168 const uint32_t cache_line_size =
169 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
170 const struct pvr_static_clear_ppp_base *const base =
171 &device->static_clear_state.ppp_base;
172 struct pvr_suballoc_bo *pvr_bo;
173 uint32_t *stream;
174 VkResult result;
175
176 result = pvr_bo_suballoc(&device->suballoc_general,
177 PVR_DW_TO_BYTES(dword_count),
178 cache_line_size,
179 false,
180 &pvr_bo);
181 if (result != VK_SUCCESS) {
182 *pvr_bo_out = NULL;
183 return result;
184 }
185
186 stream = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
187
188 pvr_csb_write_value(stream, TA_STATE_HEADER, template->header);
189 pvr_csb_write_struct(stream, TA_STATE_ISPCTL, &template->config.ispctl);
190 pvr_csb_write_struct(stream, TA_STATE_ISPA, &template->config.ispa);
191 pvr_csb_write_value(stream, TA_STATE_ISPB, template->ispb);
192
193 if (template->requires_pds_state) {
194 static_assert(sizeof(*stream) == sizeof((*template->config.pds_state)[0]),
195 "Size mismatch");
196 for (uint32_t i = 0; i < PVR_STATIC_CLEAR_PDS_STATE_COUNT; i++)
197 *stream++ = (*template->config.pds_state)[i];
198 }
199
200 pvr_csb_write_struct(stream,
201 TA_REGION_CLIP0,
202 &template->config.region_clip0);
203 pvr_csb_write_struct(stream,
204 TA_REGION_CLIP1,
205 &template->config.region_clip1);
206 pvr_csb_write_value(stream, TA_WCLAMP, base->wclamp);
207 pvr_csb_write_struct(stream, TA_OUTPUT_SEL, &template->config.output_sel);
208 pvr_csb_write_value(stream, TA_STATE_VARYING0, base->varying_word[0]);
209 pvr_csb_write_value(stream, TA_STATE_VARYING1, base->varying_word[1]);
210 pvr_csb_write_value(stream, TA_STATE_VARYING2, base->varying_word[2]);
211 pvr_csb_write_value(stream, TA_STATE_PPP_CTRL, base->ppp_ctrl);
212 pvr_csb_write_value(stream, TA_STATE_STREAM_OUT0, base->stream_out0);
213
214 assert((uint64_t)(stream - (uint32_t *)pvr_bo_suballoc_get_map_addr(
215 pvr_bo)) == dword_count);
216
217 stream = NULL;
218
219 pvr_csb_set_relocation_mark(csb);
220
221 pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state) {
222 state.word_count = dword_count;
223 state.addrmsb = pvr_bo->dev_addr;
224 }
225
226 pvr_csb_emit (csb, VDMCTRL_PPP_STATE1, state) {
227 state.addrlsb = pvr_bo->dev_addr;
228 }
229
230 pvr_csb_clear_relocation_mark(csb);
231
232 *pvr_bo_out = pvr_bo;
233
234 return VK_SUCCESS;
235 }
236
237 static VkResult
pvr_device_init_clear_attachment_programs(struct pvr_device * device)238 pvr_device_init_clear_attachment_programs(struct pvr_device *device)
239 {
240 const uint32_t pds_prog_alignment =
241 MAX2(PVRX(TA_STATE_PDS_TEXUNICODEBASE_ADDR_ALIGNMENT),
242 PVRX(TA_STATE_PDS_SHADERBASE_ADDR_ALIGNMENT));
243 struct pvr_device_static_clear_state *clear_state =
244 &device->static_clear_state;
245 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
246 uint32_t pds_texture_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
247 uint32_t pds_pixel_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
248 uint32_t usc_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
249 uint64_t usc_upload_offset;
250 uint64_t pds_upload_offset;
251 uint32_t alloc_size = 0;
252 VkResult result;
253 uint8_t *ptr;
254
255 #if !defined(NDEBUG)
256 uint32_t clear_attachment_info_count = 0;
257
258 for (uint32_t i = 0; i < ARRAY_SIZE(clear_attachment_collection); i++) {
259 if (!clear_attachment_collection[i].info)
260 continue;
261
262 clear_attachment_info_count++;
263 }
264
265 assert(clear_attachment_info_count == PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT);
266 #endif
267
268 /* Upload USC fragment shaders. */
269
270 for (uint32_t i = 0, offset_idx = 0;
271 i < ARRAY_SIZE(clear_attachment_collection);
272 i++) {
273 if (!clear_attachment_collection[i].info)
274 continue;
275
276 usc_program_offsets[offset_idx] = alloc_size;
277 /* TODO: The compiler will likely give us a pre-aligned size for the USC
278 * shader so don't bother aligning here when it's hooked up.
279 */
280 alloc_size += ALIGN_POT(clear_attachment_collection[i].size, 4);
281
282 offset_idx++;
283 }
284
285 result = pvr_bo_suballoc(&device->suballoc_usc,
286 alloc_size,
287 4,
288 false,
289 &clear_state->usc_clear_attachment_programs);
290 if (result != VK_SUCCESS)
291 return result;
292
293 usc_upload_offset =
294 clear_state->usc_clear_attachment_programs->dev_addr.addr -
295 device->heaps.usc_heap->base_addr.addr;
296 ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(
297 clear_state->usc_clear_attachment_programs);
298
299 for (uint32_t i = 0, offset_idx = 0;
300 i < ARRAY_SIZE(clear_attachment_collection);
301 i++) {
302 if (!clear_attachment_collection[i].info)
303 continue;
304
305 memcpy(ptr + usc_program_offsets[offset_idx],
306 clear_attachment_collection[i].code,
307 clear_attachment_collection[i].size);
308
309 offset_idx++;
310 }
311
312 /* Upload PDS programs. */
313
314 alloc_size = 0;
315
316 for (uint32_t i = 0, offset_idx = 0;
317 i < ARRAY_SIZE(clear_attachment_collection);
318 i++) {
319 struct pvr_pds_pixel_shader_sa_program texture_pds_program;
320 struct pvr_pds_kickusc_program pixel_shader_pds_program;
321 uint32_t program_size;
322
323 if (!clear_attachment_collection[i].info)
324 continue;
325
326 /* Texture program to load colors. */
327
328 texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
329 .num_texture_dma_kicks = 1,
330 };
331
332 pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program);
333
334 pds_texture_program_offsets[offset_idx] = alloc_size;
335 alloc_size += ALIGN_POT(PVR_DW_TO_BYTES(texture_pds_program.code_size),
336 pds_prog_alignment);
337
338 /* Pixel program to load fragment shader. */
339
340 pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
341
342 pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
343 usc_upload_offset + usc_program_offsets[offset_idx],
344 clear_attachment_collection[i].info->temps_required,
345 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
346 false);
347
348 pvr_pds_set_sizes_pixel_shader(&pixel_shader_pds_program);
349
350 program_size = pixel_shader_pds_program.code_size +
351 pixel_shader_pds_program.data_size;
352 program_size = PVR_DW_TO_BYTES(program_size);
353
354 pds_pixel_program_offsets[offset_idx] = alloc_size;
355 alloc_size += ALIGN_POT(program_size, pds_prog_alignment);
356
357 offset_idx++;
358 }
359
360 result = pvr_bo_suballoc(&device->suballoc_pds,
361 alloc_size,
362 pds_prog_alignment,
363 false,
364 &clear_state->pds_clear_attachment_programs);
365 if (result != VK_SUCCESS) {
366 pvr_bo_suballoc_free(clear_state->usc_clear_attachment_programs);
367 return result;
368 }
369
370 pds_upload_offset =
371 clear_state->pds_clear_attachment_programs->dev_addr.addr -
372 device->heaps.pds_heap->base_addr.addr;
373 ptr =
374 pvr_bo_suballoc_get_map_addr(clear_state->pds_clear_attachment_programs);
375
376 for (uint32_t i = 0, offset_idx = 0;
377 i < ARRAY_SIZE(clear_attachment_collection);
378 i++) {
379 struct pvr_pds_pixel_shader_sa_program texture_pds_program;
380 struct pvr_pds_kickusc_program pixel_shader_pds_program;
381
382 if (!clear_attachment_collection[i].info) {
383 clear_state->pds_clear_attachment_program_info[i] =
384 (struct pvr_pds_clear_attachment_program_info){ 0 };
385
386 continue;
387 }
388
389 /* Texture program to load colors. */
390
391 texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
392 .num_texture_dma_kicks = 1,
393 };
394
395 pvr_pds_generate_pixel_shader_sa_code_segment(
396 &texture_pds_program,
397 (uint32_t *)(ptr + pds_texture_program_offsets[offset_idx]));
398
399 /* Pixel program to load fragment shader. */
400
401 pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
402
403 pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
404 usc_upload_offset + usc_program_offsets[offset_idx],
405 clear_attachment_collection[i].info->temps_required,
406 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
407 false);
408
409 pvr_pds_generate_pixel_shader_program(
410 &pixel_shader_pds_program,
411 (uint32_t *)(ptr + pds_pixel_program_offsets[offset_idx]));
412
413 /* Setup the PDS program info. */
414
415 pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program,
416 dev_info);
417
418 clear_state->pds_clear_attachment_program_info[i] =
419 (struct pvr_pds_clear_attachment_program_info){
420 .texture_program_offset = PVR_DEV_ADDR(
421 pds_upload_offset + pds_texture_program_offsets[offset_idx]),
422 .pixel_program_offset = PVR_DEV_ADDR(
423 pds_upload_offset + pds_pixel_program_offsets[offset_idx]),
424
425 .texture_program_pds_temps_count = texture_pds_program.temps_used,
426 .texture_program_data_size = texture_pds_program.data_size,
427 };
428
429 offset_idx++;
430 }
431
432 return VK_SUCCESS;
433 }
434
435 static void
pvr_device_finish_clear_attachment_programs(struct pvr_device * device)436 pvr_device_finish_clear_attachment_programs(struct pvr_device *device)
437 {
438 struct pvr_device_static_clear_state *clear_state =
439 &device->static_clear_state;
440
441 pvr_bo_suballoc_free(clear_state->usc_clear_attachment_programs);
442 pvr_bo_suballoc_free(clear_state->pds_clear_attachment_programs);
443 }
444
445 /**
446 * \brief Generate and uploads vertices required to clear the rect area.
447 *
448 * We use the triangle strip topology for clears so this functions generates 4
449 * vertices to represent the rect. Note that the coordinates are in screen space
450 * and not NDC.
451 *
452 * \param[in] device Device to upload to.
453 * \param[in] rect Area to clear.
454 * \param[in] depth Depth (i.e. Z coordinate) of the area to clear.
455 * \param[out] pvr_bo_out BO upload object.
456 * \return VK_SUCCESS if the upload succeeded.
457 */
pvr_clear_vertices_upload(struct pvr_device * device,const VkRect2D * rect,float depth,struct pvr_suballoc_bo ** const pvr_bo_out)458 VkResult pvr_clear_vertices_upload(struct pvr_device *device,
459 const VkRect2D *rect,
460 float depth,
461 struct pvr_suballoc_bo **const pvr_bo_out)
462 {
463 const float y1 = (float)(rect->offset.y + rect->extent.height);
464 const float x1 = (float)(rect->offset.x + rect->extent.width);
465 const float y0 = (float)rect->offset.y;
466 const float x0 = (float)rect->offset.x;
467
468 const float vertices[PVR_CLEAR_VERTEX_COUNT][PVR_CLEAR_VERTEX_COORDINATES] = {
469 [0] = { [0] = x0, [1] = y0, [2] = depth },
470 [1] = { [0] = x0, [1] = y1, [2] = depth },
471 [2] = { [0] = x1, [1] = y0, [2] = depth },
472 [3] = { [0] = x1, [1] = y1, [2] = depth }
473 };
474
475 return pvr_gpu_upload(device,
476 device->heaps.general_heap,
477 vertices,
478 sizeof(vertices),
479 4,
480 pvr_bo_out);
481 }
482
pvr_device_init_graphics_static_clear_state(struct pvr_device * device)483 VkResult pvr_device_init_graphics_static_clear_state(struct pvr_device *device)
484 {
485 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
486 const VkRect2D vf_rect = {
487 .offset = { .x = 0, .y = 0 },
488 .extent = { .width = rogue_get_param_vf_max_x(dev_info),
489 .height = rogue_get_param_vf_max_y(dev_info) }
490 };
491
492 const uint32_t vdm_state_size_in_dw =
493 pvr_clear_vdm_state_get_size_in_dw(dev_info, 1);
494 struct pvr_device_static_clear_state *state = &device->static_clear_state;
495 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
496 struct pvr_pds_vertex_shader_program pds_program;
497 struct util_dynarray passthrough_vert_shader;
498 uint32_t *state_buffer;
499 VkResult result;
500
501 if (PVR_HAS_FEATURE(dev_info, gs_rta_support)) {
502 struct util_dynarray passthrough_rta_vert_shader;
503
504 util_dynarray_init(&passthrough_rta_vert_shader, NULL);
505 pvr_hard_code_get_passthrough_rta_vertex_shader(
506 dev_info,
507 &passthrough_rta_vert_shader);
508
509 result = pvr_gpu_upload_usc(device,
510 passthrough_rta_vert_shader.data,
511 passthrough_rta_vert_shader.size,
512 cache_line_size,
513 &state->usc_multi_layer_vertex_shader_bo);
514 if (result != VK_SUCCESS) {
515 util_dynarray_fini(&passthrough_rta_vert_shader);
516 return result;
517 }
518
519 util_dynarray_fini(&passthrough_rta_vert_shader);
520 } else {
521 state->usc_multi_layer_vertex_shader_bo = NULL;
522 }
523
524 util_dynarray_init(&passthrough_vert_shader, NULL);
525 pvr_hard_code_get_passthrough_vertex_shader(dev_info,
526 &passthrough_vert_shader);
527
528 result = pvr_gpu_upload_usc(device,
529 passthrough_vert_shader.data,
530 passthrough_vert_shader.size,
531 cache_line_size,
532 &state->usc_vertex_shader_bo);
533 util_dynarray_fini(&passthrough_vert_shader);
534 if (result != VK_SUCCESS)
535 goto err_free_usc_multi_layer_shader;
536
537 result =
538 pvr_clear_vertices_upload(device, &vf_rect, 0.0f, &state->vertices_bo);
539 if (result != VK_SUCCESS)
540 goto err_free_usc_shader;
541
542 pvr_pds_clear_vertex_shader_program_init_base(&pds_program,
543 state->usc_vertex_shader_bo);
544
545 result =
546 pvr_pds_clear_vertex_shader_program_create_and_upload(&pds_program,
547 device,
548 state->vertices_bo,
549 &state->pds);
550 if (result != VK_SUCCESS)
551 goto err_free_vertices_buffer;
552
553 pvr_device_setup_graphics_static_clear_ppp_base(&state->ppp_base);
554 pvr_device_setup_graphics_static_clear_ppp_templates(state->ppp_templates);
555
556 assert(pds_program.code_size <= state->pds.code_size);
557
558 state_buffer = vk_alloc(&device->vk.alloc,
559 PVR_DW_TO_BYTES(vdm_state_size_in_dw * 2),
560 8,
561 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
562 if (state_buffer == NULL) {
563 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
564 goto err_free_pds_program;
565 }
566
567 /* TODO: The difference between the large and normal words is only the last
568 * word. The value is 3 or 4 depending on the amount of indices. Should we
569 * dedup this?
570 */
571
572 /* The large clear state words cover the max framebuffer. The normal clear
573 * state words cover only half (since 3 indices are passed, forming a single
574 * triangle, instead of 4) and are used when the render area fits within a
575 * quarter of the max framebuffer, i.e. fit within the single triangle.
576 */
577 /* 4 * sizeof(uint32_t) because of the 4 pixel output regs. */
578 /* TODO: Replace 4 * sizeof(uint32_t) with a defines from the compiler or
579 * hook up the value directly to it using some compiler info.
580 */
581 pvr_pack_clear_vdm_state(&device->pdevice->dev_info,
582 &state->pds,
583 pds_program.temps_used,
584 3,
585 4 * sizeof(uint32_t),
586 1,
587 state_buffer);
588 state->vdm_words = state_buffer;
589 state_buffer += vdm_state_size_in_dw;
590
591 pvr_pack_clear_vdm_state(&device->pdevice->dev_info,
592 &state->pds,
593 pds_program.temps_used,
594 4,
595 4 * sizeof(uint32_t),
596 1,
597 state_buffer);
598 state->large_clear_vdm_words = state_buffer;
599
600 result = pvr_device_init_clear_attachment_programs(device);
601 if (result != VK_SUCCESS)
602 goto err_free_vdm_state;
603
604 return VK_SUCCESS;
605
606 err_free_vdm_state:
607 /* Cast away the const :( */
608 vk_free(&device->vk.alloc, (void *)state->vdm_words);
609
610 err_free_pds_program:
611 pvr_bo_suballoc_free(state->pds.pvr_bo);
612
613 err_free_vertices_buffer:
614 pvr_bo_suballoc_free(state->vertices_bo);
615
616 err_free_usc_shader:
617 pvr_bo_suballoc_free(state->usc_vertex_shader_bo);
618
619 err_free_usc_multi_layer_shader:
620 pvr_bo_suballoc_free(state->usc_multi_layer_vertex_shader_bo);
621
622 return result;
623 }
624
pvr_device_finish_graphics_static_clear_state(struct pvr_device * device)625 void pvr_device_finish_graphics_static_clear_state(struct pvr_device *device)
626 {
627 struct pvr_device_static_clear_state *state = &device->static_clear_state;
628
629 pvr_device_finish_clear_attachment_programs(device);
630
631 /* Don't free `large_clear_vdm_words` since it was allocated together with
632 * `vdm_words`.
633 */
634 /* Cast away the const :( */
635 vk_free(&device->vk.alloc, (void *)state->vdm_words);
636
637 pvr_bo_suballoc_free(state->pds.pvr_bo);
638 pvr_bo_suballoc_free(state->vertices_bo);
639 pvr_bo_suballoc_free(state->usc_vertex_shader_bo);
640 pvr_bo_suballoc_free(state->usc_multi_layer_vertex_shader_bo);
641 }
642
pvr_pds_clear_vertex_shader_program_init_base(struct pvr_pds_vertex_shader_program * program,const struct pvr_suballoc_bo * usc_shader_bo)643 void pvr_pds_clear_vertex_shader_program_init_base(
644 struct pvr_pds_vertex_shader_program *program,
645 const struct pvr_suballoc_bo *usc_shader_bo)
646 {
647 *program = (struct pvr_pds_vertex_shader_program){
648 .num_streams = 1,
649 .streams = {
650 [0] = {
651 /* We'll get this from this interface's client when generating the
652 * data segment. This will be the address of the vertex buffer.
653 */
654 .address = 0,
655 .stride = PVR_CLEAR_VERTEX_COORDINATES * sizeof(uint32_t),
656 .num_elements = 1,
657 .elements = {
658 [0] = {
659 .size = PVR_CLEAR_VERTEX_COUNT * PVR_CLEAR_VERTEX_COORDINATES,
660 },
661 },
662 },
663 },
664 };
665
666 pvr_pds_setup_doutu(&program->usc_task_control,
667 usc_shader_bo->dev_addr.addr,
668 0,
669 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
670 false);
671 }
672
pvr_pds_clear_vertex_shader_program_create_and_upload(struct pvr_pds_vertex_shader_program * program,struct pvr_device * device,const struct pvr_suballoc_bo * vertices_bo,struct pvr_pds_upload * const upload_out)673 VkResult pvr_pds_clear_vertex_shader_program_create_and_upload(
674 struct pvr_pds_vertex_shader_program *program,
675 struct pvr_device *device,
676 const struct pvr_suballoc_bo *vertices_bo,
677 struct pvr_pds_upload *const upload_out)
678 {
679 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
680 uint32_t staging_buffer_size;
681 uint32_t *staging_buffer;
682 VkResult result;
683
684 program->streams[0].address = vertices_bo->dev_addr.addr;
685
686 pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
687
688 staging_buffer_size =
689 PVR_DW_TO_BYTES(program->code_size + program->data_size);
690
691 staging_buffer = vk_alloc(&device->vk.alloc,
692 staging_buffer_size,
693 8,
694 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
695 if (!staging_buffer) {
696 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
697 goto err_exit;
698 }
699
700 pvr_pds_vertex_shader(program,
701 staging_buffer,
702 PDS_GENERATE_DATA_SEGMENT,
703 dev_info);
704 pvr_pds_vertex_shader(program,
705 &staging_buffer[program->data_size],
706 PDS_GENERATE_CODE_SEGMENT,
707 dev_info);
708
709 /* FIXME: Figure out the define for alignment of 16. */
710 result = pvr_gpu_upload_pds(device,
711 &staging_buffer[0],
712 program->data_size,
713 16,
714 &staging_buffer[program->data_size],
715 program->code_size,
716 16,
717 16,
718 upload_out);
719 if (result != VK_SUCCESS)
720 goto err_free_staging_buffer;
721
722 vk_free(&device->vk.alloc, staging_buffer);
723 return VK_SUCCESS;
724
725 err_free_staging_buffer:
726 vk_free(&device->vk.alloc, staging_buffer);
727
728 err_exit:
729 *upload_out = (struct pvr_pds_upload){ 0 };
730 return result;
731 }
732
pvr_pds_clear_vertex_shader_program_create_and_upload_data(struct pvr_pds_vertex_shader_program * program,struct pvr_cmd_buffer * cmd_buffer,struct pvr_suballoc_bo * vertices_bo,struct pvr_pds_upload * const pds_upload_out)733 VkResult pvr_pds_clear_vertex_shader_program_create_and_upload_data(
734 struct pvr_pds_vertex_shader_program *program,
735 struct pvr_cmd_buffer *cmd_buffer,
736 struct pvr_suballoc_bo *vertices_bo,
737 struct pvr_pds_upload *const pds_upload_out)
738 {
739 struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
740 uint32_t staging_buffer_size;
741 uint32_t *staging_buffer;
742 VkResult result;
743
744 program->streams[0].address = vertices_bo->dev_addr.addr;
745
746 pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
747
748 staging_buffer_size = PVR_DW_TO_BYTES(program->data_size);
749
750 staging_buffer = vk_alloc(&cmd_buffer->device->vk.alloc,
751 staging_buffer_size,
752 8,
753 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
754 if (!staging_buffer) {
755 *pds_upload_out = (struct pvr_pds_upload){ 0 };
756
757 return vk_command_buffer_set_error(&cmd_buffer->vk,
758 VK_ERROR_OUT_OF_HOST_MEMORY);
759 }
760
761 pvr_pds_vertex_shader(program,
762 staging_buffer,
763 PDS_GENERATE_DATA_SEGMENT,
764 dev_info);
765
766 result = pvr_cmd_buffer_upload_pds(cmd_buffer,
767 staging_buffer,
768 program->data_size,
769 4,
770 NULL,
771 0,
772 0,
773 4,
774 pds_upload_out);
775 if (result != VK_SUCCESS) {
776 vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
777
778 *pds_upload_out = (struct pvr_pds_upload){ 0 };
779
780 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
781 }
782
783 vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
784
785 return VK_SUCCESS;
786 }
787
pvr_pds_clear_rta_vertex_shader_program_init_base(struct pvr_pds_vertex_shader_program * program,const struct pvr_suballoc_bo * usc_shader_bo)788 void pvr_pds_clear_rta_vertex_shader_program_init_base(
789 struct pvr_pds_vertex_shader_program *program,
790 const struct pvr_suballoc_bo *usc_shader_bo)
791 {
792 pvr_pds_clear_vertex_shader_program_init_base(program, usc_shader_bo);
793
794 /* We'll set the render target index to be the instance id + base array
795 * layer. Since the base array layer can change in between clear rects, we
796 * don't set it here and ask for it when generating the code and data
797 * section.
798 */
799 /* This is 3 because the instance id register will follow the xyz coordinate
800 * registers in the register file.
801 * TODO: Maybe we want this to be hooked up to the compiler?
802 */
803 program->iterate_instance_id = true;
804 program->instance_id_register = 3;
805 }
806
pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(struct pvr_pds_vertex_shader_program * program,struct pvr_cmd_buffer * cmd_buffer,uint32_t base_array_layer,struct pvr_pds_upload * const pds_upload_out)807 VkResult pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
808 struct pvr_pds_vertex_shader_program *program,
809 struct pvr_cmd_buffer *cmd_buffer,
810 uint32_t base_array_layer,
811 struct pvr_pds_upload *const pds_upload_out)
812 {
813 struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
814 uint32_t staging_buffer_size;
815 uint32_t *staging_buffer;
816 VkResult result;
817
818 program->instance_id_modifier = base_array_layer;
819
820 pvr_pds_vertex_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
821
822 staging_buffer_size = PVR_DW_TO_BYTES(program->code_size);
823
824 staging_buffer = vk_alloc(&cmd_buffer->device->vk.alloc,
825 staging_buffer_size,
826 8,
827 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
828 if (!staging_buffer) {
829 *pds_upload_out = (struct pvr_pds_upload){ 0 };
830
831 return vk_command_buffer_set_error(&cmd_buffer->vk,
832 VK_ERROR_OUT_OF_HOST_MEMORY);
833 }
834
835 pvr_pds_vertex_shader(program,
836 staging_buffer,
837 PDS_GENERATE_CODE_SEGMENT,
838 dev_info);
839
840 result = pvr_cmd_buffer_upload_pds(cmd_buffer,
841 NULL,
842 0,
843 0,
844 staging_buffer,
845 program->code_size,
846 4,
847 4,
848 pds_upload_out);
849 if (result != VK_SUCCESS) {
850 vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
851
852 *pds_upload_out = (struct pvr_pds_upload){ 0 };
853
854 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
855 }
856
857 vk_free(&cmd_buffer->device->vk.alloc, staging_buffer);
858
859 return VK_SUCCESS;
860 }
861
862 /**
863 * Pack VDM control stream words for clear.
864 *
865 * The size of the `state_buffer` provided is expected to point to a buffer of
866 * size equal to what is returned by `pvr_clear_vdm_state_get_size_in_dw()`.
867 */
pvr_pack_clear_vdm_state(const struct pvr_device_info * const dev_info,const struct pvr_pds_upload * const program,uint32_t temps,uint32_t index_count,uint32_t vs_output_size_in_bytes,uint32_t layer_count,uint32_t * const state_buffer)868 void pvr_pack_clear_vdm_state(const struct pvr_device_info *const dev_info,
869 const struct pvr_pds_upload *const program,
870 uint32_t temps,
871 uint32_t index_count,
872 uint32_t vs_output_size_in_bytes,
873 uint32_t layer_count,
874 uint32_t *const state_buffer)
875 {
876 const uint32_t vs_output_size =
877 DIV_ROUND_UP(vs_output_size_in_bytes,
878 PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE));
879 const bool needs_instance_count =
880 !PVR_HAS_FEATURE(dev_info, gs_rta_support) && layer_count > 1;
881 uint32_t *stream = state_buffer;
882 uint32_t max_instances;
883 uint32_t cam_size;
884
885 /* The layer count should at least be 1. For vkCmdClearAttachment() the spec.
886 * guarantees that the layer count is not 0.
887 */
888 assert(layer_count != 0);
889
890 pvr_calculate_vertex_cam_size(dev_info,
891 vs_output_size,
892 true,
893 &cam_size,
894 &max_instances);
895
896 pvr_csb_pack (stream, VDMCTRL_VDM_STATE0, state0) {
897 state0.vs_data_addr_present = true;
898 state0.vs_other_present = true;
899 state0.cam_size = cam_size;
900 state0.uvs_scratch_size_select =
901 PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE);
902 state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0);
903 }
904 stream += pvr_cmd_length(VDMCTRL_VDM_STATE0);
905
906 pvr_csb_pack (stream, VDMCTRL_VDM_STATE2, state2) {
907 state2.vs_pds_data_base_addr = PVR_DEV_ADDR(program->data_offset);
908 }
909 stream += pvr_cmd_length(VDMCTRL_VDM_STATE2);
910
911 pvr_csb_pack (stream, VDMCTRL_VDM_STATE3, state3) {
912 state3.vs_pds_code_base_addr = PVR_DEV_ADDR(program->code_offset);
913 }
914 stream += pvr_cmd_length(VDMCTRL_VDM_STATE3);
915
916 pvr_csb_pack (stream, VDMCTRL_VDM_STATE4, state4) {
917 state4.vs_output_size = vs_output_size;
918 }
919 stream += pvr_cmd_length(VDMCTRL_VDM_STATE4);
920
921 pvr_csb_pack (stream, VDMCTRL_VDM_STATE5, state5) {
922 state5.vs_max_instances = max_instances;
923 /* This is the size of the input vertex. The hw manages the USC
924 * temporaries separately so we don't need to include them here.
925 */
926 state5.vs_usc_unified_size =
927 DIV_ROUND_UP(PVR_CLEAR_VERTEX_COORDINATES * sizeof(uint32_t),
928 PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE));
929 state5.vs_pds_temp_size =
930 DIV_ROUND_UP(temps,
931 PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE));
932 state5.vs_pds_data_size =
933 DIV_ROUND_UP(PVR_DW_TO_BYTES(program->data_size),
934 PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
935 }
936 stream += pvr_cmd_length(VDMCTRL_VDM_STATE5);
937
938 /* TODO: Here we're doing another state update. If emitting directly to the
939 * control stream, we don't mark them as separate state updates by setting
940 * the relocation mark so we might be wasting a little bit of memory. See if
941 * it's worth changing the code to use the relocation mark.
942 */
943
944 pvr_csb_pack (stream, VDMCTRL_INDEX_LIST0, index_list0) {
945 index_list0.index_count_present = true;
946 index_list0.index_instance_count_present = needs_instance_count;
947 index_list0.primitive_topology =
948 PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP);
949 }
950 stream += pvr_cmd_length(VDMCTRL_INDEX_LIST0);
951
952 pvr_csb_pack (stream, VDMCTRL_INDEX_LIST2, index_list3) {
953 index_list3.index_count = index_count;
954 }
955 stream += pvr_cmd_length(VDMCTRL_INDEX_LIST2);
956
957 if (needs_instance_count) {
958 pvr_csb_pack (stream, VDMCTRL_INDEX_LIST3, index_list3) {
959 index_list3.instance_count = layer_count - 1;
960 }
961 stream += pvr_cmd_length(VDMCTRL_INDEX_LIST3);
962 }
963
964 assert((uint64_t)(stream - state_buffer) ==
965 pvr_clear_vdm_state_get_size_in_dw(dev_info, layer_count));
966 }
967