1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "pvr_types.h"
36 #include "util/log.h"
37 #include "util/macros.h"
38
39 #define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
40 #define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
41 #define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
42
43 #define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
44 #define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
45
46 #define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
47 #define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
48 #define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
49
50 #define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
51 #define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
52
53 /* 32-bit PTemp index for draw indirect base instance. */
54 #define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
55
56 /* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
57 #define PVR_PDS_DDMAD_NUM_CONSTS 8
58
59 #if defined(TRACE_PDS)
60 /* Some macros for a pretty printing. */
61
62 # define pvr_debug_pds_const(reg, size, annotation) \
63 mesa_logd("const[%d] @ (%dbits) %s", reg, size, annotation)
64 # define pvr_debug_pds_temp(reg, size, annotation) \
65 mesa_logd("temp[%d] @ (%dbits) %s", reg, size, annotation)
66 # define pvr_debug_pds_note(...) mesa_logd(" // " __VA_ARGS__)
67 # define pvr_debug_pds_flag(flags, flag) \
68 { \
69 if ((flags & flag) == flag) \
70 mesa_logd(" > " #flag); \
71 }
72 # define pvr_debug(annotation) mesa_logd(annotation)
73
74 #else
75 # define pvr_debug_pds_const(reg, size, annotation)
76 # define pvr_debug_pds_temp(reg, size, annotation)
77 # define pvr_debug_pds_note(...)
78 # define pvr_debug_pds_flag(flags, flag)
79 # define pvr_debug(annotation)
80 #endif
81
82 struct pvr_pds_const_map_entry_write_state {
83 const struct pvr_pds_info *PDS_info;
84 struct pvr_const_map_entry *entry;
85 size_t size_of_last_entry_in_bytes;
86 uint32_t entry_count;
87 size_t entries_size_in_bytes;
88 };
89
pvr_init_pds_const_map_entry_write_state(struct pvr_pds_info * PDS_info,struct pvr_pds_const_map_entry_write_state * entry_write_state)90 static void pvr_init_pds_const_map_entry_write_state(
91 struct pvr_pds_info *PDS_info,
92 struct pvr_pds_const_map_entry_write_state *entry_write_state)
93 {
94 entry_write_state->PDS_info = PDS_info;
95 entry_write_state->entry = PDS_info->entries;
96 entry_write_state->size_of_last_entry_in_bytes = 0;
97 entry_write_state->entry_count = 0;
98 entry_write_state->entries_size_in_bytes = 0;
99 }
100
101 /* Returns a pointer to the next struct pvr_const_map_entry. */
pvr_prepare_next_pds_const_map_entry(struct pvr_pds_const_map_entry_write_state * entry_write_state,size_t size_of_next_entry_in_bytes)102 static void *pvr_prepare_next_pds_const_map_entry(
103 struct pvr_pds_const_map_entry_write_state *entry_write_state,
104 size_t size_of_next_entry_in_bytes)
105 {
106 /* Move on to the next entry. */
107 uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
108 entry_write_state->size_of_last_entry_in_bytes);
109 entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
110
111 entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
112 entry_write_state->entry_count++;
113 entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
114
115 /* Check if we can write into the next entry. */
116 assert(entry_write_state->entries_size_in_bytes <=
117 entry_write_state->PDS_info->entries_size_in_bytes);
118
119 return entry_write_state->entry;
120 }
121
pvr_write_pds_const_map_entry_vertex_attribute_address(struct pvr_pds_const_map_entry_write_state * entry_write_state,const struct pvr_pds_vertex_dma * DMA,uint32_t const_val,bool use_robust_vertex_fetch)122 static void pvr_write_pds_const_map_entry_vertex_attribute_address(
123 struct pvr_pds_const_map_entry_write_state *entry_write_state,
124 const struct pvr_pds_vertex_dma *DMA,
125 uint32_t const_val,
126 bool use_robust_vertex_fetch)
127 {
128 pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
129 DMA->size_in_dwords,
130 DMA->stride,
131 DMA->offset,
132 DMA->binding_index);
133
134 if (use_robust_vertex_fetch) {
135 struct pvr_const_map_entry_robust_vertex_attribute_address
136 *robust_attribute_entry;
137
138 robust_attribute_entry =
139 pvr_prepare_next_pds_const_map_entry(entry_write_state,
140 sizeof(*robust_attribute_entry));
141 robust_attribute_entry->type =
142 PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
143 robust_attribute_entry->const_offset = const_val;
144 robust_attribute_entry->binding_index = DMA->binding_index;
145 robust_attribute_entry->component_size_in_bytes =
146 DMA->component_size_in_bytes;
147 robust_attribute_entry->offset = DMA->offset;
148 robust_attribute_entry->stride = DMA->stride;
149 robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
150 robust_attribute_entry->robustness_buffer_offset =
151 DMA->robustness_buffer_offset;
152 } else {
153 struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
154
155 attribute_entry =
156 pvr_prepare_next_pds_const_map_entry(entry_write_state,
157 sizeof(*attribute_entry));
158 attribute_entry->type =
159 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
160 attribute_entry->const_offset = const_val;
161 attribute_entry->binding_index = DMA->binding_index;
162 attribute_entry->offset = DMA->offset;
163 attribute_entry->stride = DMA->stride;
164 attribute_entry->size_in_dwords = DMA->size_in_dwords;
165 }
166 }
167
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)168 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
169 uint32_t end,
170 uint32_t src0)
171 {
172 return pvr_pds_inst_encode_dout(cc,
173 end,
174 0,
175 src0,
176 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
177 }
178
179 static uint32_t
pvr_encode_burst(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_dma,bool halt,unsigned int const32,unsigned int const64,unsigned int dma_size_in_dwords,unsigned int destination,unsigned int store)180 pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
181 bool last_dma,
182 bool halt,
183 unsigned int const32,
184 unsigned int const64,
185 unsigned int dma_size_in_dwords,
186 unsigned int destination,
187 unsigned int store)
188 {
189 uint32_t literal_value;
190
191 /* Encode literal value. */
192 literal_value = dma_size_in_dwords
193 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
194 literal_value |= destination
195 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
196 literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
197 store;
198
199 if (last_dma)
200 literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
201
202 /* Create const map entry. */
203 struct pvr_const_map_entry_literal32 *literal_entry;
204
205 literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
206 sizeof(*literal_entry));
207 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
208 literal_entry->const_offset = const32;
209 literal_entry->literal_value = literal_value;
210
211 /* Encode DOUTD */
212 return pvr_pds_inst_encode_dout(0,
213 halt,
214 R32_C(const32),
215 R64_C(const64),
216 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218
219 #define pvr_encode_burst_cs(psDataEntry, \
220 last_dma, \
221 halt, \
222 const32, \
223 const64, \
224 dma_size_in_dwords, \
225 destination) \
226 pvr_encode_burst( \
227 psDataEntry, \
228 last_dma, \
229 halt, \
230 const32, \
231 const64, \
232 dma_size_in_dwords, \
233 destination, \
234 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
235
pvr_encode_direct_write(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_dma,bool halt,unsigned int const32,unsigned int const64,uint32_t data_mask,unsigned int destination,uint32_t destination_store,const struct pvr_device_info * dev_info)236 static uint32_t pvr_encode_direct_write(
237 struct pvr_pds_const_map_entry_write_state *entry_write_state,
238 bool last_dma,
239 bool halt,
240 unsigned int const32,
241 unsigned int const64,
242 uint32_t data_mask,
243 unsigned int destination,
244 uint32_t destination_store,
245 const struct pvr_device_info *dev_info)
246 {
247 struct pvr_const_map_entry_literal32 *literal_entry;
248
249 uint32_t instruction =
250 pvr_pds_inst_encode_dout(0,
251 halt,
252 const32,
253 const64,
254 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
255
256 literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
257 sizeof(*literal_entry));
258 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
259 literal_entry->const_offset = const32;
260 literal_entry->literal_value = destination_store;
261
262 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
263 literal_entry->literal_value |=
264 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
265 }
266
267 literal_entry->literal_value |=
268 destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
269
270 if (data_mask == 0x1) {
271 literal_entry->literal_value |=
272 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
273 } else if (data_mask == 0x2) {
274 literal_entry->literal_value |=
275 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
276 } else {
277 literal_entry->literal_value |=
278 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
279 }
280
281 if (last_dma) {
282 literal_entry->literal_value |=
283 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
284 }
285
286 return instruction;
287 }
288
289 /* Constant and Temporary register allocation
290 * - reserve space for a 32-bit register or a 64-bit register
291 * - returned indices are offsets to 32-bit register locations
292 * - 64-bit registers need to be aligned to even indices.
293 */
294 #define RESERVE_32BIT 1U
295 #define RESERVE_64BIT 2U
296
297 #if MESA_DEBUG
298 # define pvr_find_constant(usage, words, name) \
299 pvr_find_constant2(usage, words, name)
300 # define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
301 #else
302 # define pvr_find_constant(usage, words, name) \
303 pvr_find_constant2(usage, words, NULL);
304 # define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
305 #endif
306
307 static uint32_t
pvr_find_constant2(uint8_t * const_usage,uint8_t words,const char * const_name)308 pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
309 {
310 uint32_t const_index = ~0;
311 uint32_t step = words;
312 uint8_t mask = (1 << words) - 1;
313
314 assert(words == 1 || words == 2);
315
316 /* Find a register at 'step' alignment that satisfies the mask. */
317 for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
318 for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
319 if ((const_usage[i] & (mask << b)) != 0)
320 continue;
321 const_usage[i] |= (mask << b);
322 const_index = i * 8 + b;
323 pvr_debug_pds_const(const_index, words * 32, const_name);
324 return const_index;
325 }
326 }
327
328 unreachable("Unexpected: Space cannot be found for constant");
329 return ~0;
330 }
331
332 #define PVR_MAX_PDS_TEMPS 32
333 struct pvr_temp_usage {
334 uint32_t temp_usage;
335 uint8_t temp_used;
336 uint8_t temps_needed;
337 };
338
339 #define PVR_INVALID_TEMP UINT8_C(~0)
340
pvr_get_temps2(struct pvr_temp_usage * temps,uint8_t temps_needed,const char * temp_name)341 static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
342 uint8_t temps_needed,
343 const char *temp_name)
344 {
345 uint8_t step = temps_needed;
346 uint8_t mask = (1 << temps_needed) - 1;
347
348 assert(temps_needed == 1 || temps_needed == 2);
349 assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
350
351 for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
352 if ((temps->temp_usage & (mask << i)) != 0)
353 continue;
354
355 const size_t clzBits = 8 * sizeof(unsigned int);
356
357 temps->temp_usage |= (mask << i);
358 temps->temp_used += temps_needed;
359 temps->temps_needed =
360 clzBits - __builtin_clz((unsigned int)temps->temp_usage);
361
362 pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
363
364 return i;
365 }
366
367 unreachable("Unexpected: Space cannot be found for temps");
368 return PVR_INVALID_TEMP;
369 }
370
371 /**
372 * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
373 * size of a PDS program without actually attempting to store it.
374 *
375 * \param dest The array/memory pointer where the PDS program should be stored.
376 * If the given code is NULL, automatically switch to count mode
377 * instead of attempting to fill in unallocated memory.
378 * \param counter The local counter that holds the total instruction count.
379 * \param statement What function call/value should be stored at dest[counter]
380 * when condition is false.
381 */
382
383 #define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
384 if (!dest) { \
385 counter++; \
386 } else { \
387 dest[counter++] = statement; \
388 PVR_PDS_PRINT_INST(statement); \
389 }
390
391 /**
392 * Generates the PDS vertex primary program for the dma's listed in the input
393 * structure. Produces the constant map for the Vulkan driver based upon the
394 * requirements of the instructions added to the program.
395 *
396 * PDS Data Layout
397 * ---------------
398 *
399 * The PDS data is optimized for the DDMAD layout, with the data for those
400 * instructions laid out first. The data required for other instructions is laid
401 * out in the entries unused by the DDMADs.
402 *
403 * DDMAD layout
404 * \verbatim
405 * bank | index | usage
406 * 0 | 0:1 | temps (current index)[-]
407 * 2 | 2:3 | stride[32]
408 * 1 | 4:5 | base address[64]
409 * 3 | 6:7 | ctrl[64]
410 * \endverbatim
411 *
412 * Each DMA whose stride > 0 requires one entry, laid out as above. We stride
413 * over the banks to ensure that each ddmad reads each of its operands from a
414 * different bank (i.e. remove bank clashes)
415 *
416 * Note: This is "wasting" const[0:1] and const[2], however these free
417 * registers will be used by other, non-ddmad instructions.
418 *
419 * The const register usage is maintained in the au8ConstUsage array, the
420 * DDMAD instructions, for example, will utilize the top 5 registers in each
421 * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
422 *
423 * Constant Map
424 * ------------
425 *
426 * The constant map is built up as we add PDS instructions and passed back
427 * for the driver to fill in the PDS data section with the correct parameters
428 * for each draw call.
429 *
430 * \param input_program PDS Program description.
431 * \param code Buffer to be filled in with the PDS program. If NULL is provided,
432 * automatically switch to count-mode, preventing writes to
433 * unallocated memory.
434 * \param info PDS info structure filled in for the driver, contains the
435 * constant map.
436 * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
437 * \param dev_info pvr device information struct.
438 */
pvr_pds_generate_vertex_primary_program(struct pvr_pds_vertex_primary_program_input * input_program,uint32_t * code,struct pvr_pds_info * info,bool use_robust_vertex_fetch,const struct pvr_device_info * dev_info)439 void pvr_pds_generate_vertex_primary_program(
440 struct pvr_pds_vertex_primary_program_input *input_program,
441 uint32_t *code,
442 struct pvr_pds_info *info,
443 bool use_robust_vertex_fetch,
444 const struct pvr_device_info *dev_info)
445 {
446 struct pvr_pds_const_map_entry_write_state entry_write_state;
447 struct pvr_const_map_entry_doutu_address *doutu_address_entry;
448
449 uint32_t instruction = 0; /* index into code */
450 uint32_t index; /* index used for current attribute, either vertex or
451 * instance.
452 */
453
454 uint32_t total_dma_count = 0;
455 uint32_t running_dma_count = 0;
456
457 uint32_t write_instance_control = ~0;
458 uint32_t write_vertex_control = ~0;
459 uint32_t write_base_instance_control = ~0;
460 uint32_t write_base_vertex_control = ~0;
461 uint32_t pvr_write_draw_index_control = ~0;
462
463 uint32_t ddmad_count = 0;
464 uint32_t doutw_count = 0;
465
466 uint32_t base_instance = 0;
467 uint32_t base_vertex = 0;
468 uint32_t draw_index = 0;
469
470 uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
471
472 struct pvr_temp_usage temp_usage = { 0 };
473
474 uint32_t zero_temp = PVR_INVALID_TEMP;
475
476 uint32_t max_index_temp = PVR_INVALID_TEMP;
477 uint32_t current_index_temp = PVR_INVALID_TEMP;
478
479 uint32_t index_id_temp = PVR_INVALID_TEMP;
480 uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
481 uint32_t instance_ID_temp = PVR_INVALID_TEMP;
482
483 /* Debug tracing of program flags. */
484 pvr_debug("pvr_pds_generate_vertex_primary_program");
485 pvr_debug("=================================================");
486 pvr_debug_pds_flag(input_program->flags,
487 PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
488 pvr_debug_pds_flag(input_program->flags,
489 PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
490 pvr_debug_pds_flag(input_program->flags,
491 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
492 pvr_debug_pds_flag(input_program->flags,
493 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
494 pvr_debug_pds_flag(input_program->flags,
495 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
496 pvr_debug_pds_flag(input_program->flags,
497 PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
498 pvr_debug_pds_flag(input_program->flags,
499 PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
500 pvr_debug(" ");
501
502 pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
503
504 /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
505 * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
506 */
507 info->data_size_in_dwords = 4;
508
509 /* Reserve 2 temps - these are automatically filled in by the VDM
510 *
511 * For instanced draw calls we manually increment the instance id by the
512 * base-instance offset which is either provided as a constant, or in a
513 * ptemp (for draw indirect)
514 *
515 * temp - contents
516 * ---------------
517 * 0 - index id (pre-filled)
518 * 1 - base instance + instance id
519 */
520 index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
521 instance_ID_temp =
522 pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
523
524 /* Reserve the lowest 2 dwords for DOUTU.
525 * [------XX]
526 */
527 const_usage[0] = 0x03;
528
529 /* Reserve consts for all the DDMAD's. */
530 for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
531 /* Mark the consts required by this ddmad "in-use".
532 * [XXXXX---]
533 */
534 const_usage[ddmad_count++] |= 0xf8;
535 }
536
537 /* Start off by assuming we can fit everything in the 8 dwords/ddmad
538 * footprint, if any DOUTD/DOUTW falls outside we will increase this
539 * counter.
540 */
541 if (ddmad_count)
542 info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
543
544 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
545 doutw_count++;
546 write_vertex_control =
547 pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
548 }
549
550 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
551 doutw_count++;
552 write_instance_control = pvr_find_constant(const_usage,
553 RESERVE_32BIT,
554 "Instance id DOUTW Ctrl");
555 }
556
557 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
558 doutw_count++;
559 write_base_instance_control =
560 pvr_find_constant(const_usage,
561 RESERVE_32BIT,
562 "Base Instance DOUTW Ctrl");
563 }
564
565 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
566 doutw_count++;
567 write_base_vertex_control = pvr_find_constant(const_usage,
568 RESERVE_32BIT,
569 "Base Vertex DOUTW Ctrl");
570
571 /* Load base vertex from constant for non-indirect variants. */
572 if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
573 0) {
574 struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
575 (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
576
577 base_vertex =
578 pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
579
580 psBaseVertexEntry =
581 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
582 sizeof(*psBaseVertexEntry));
583 psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
584 psBaseVertexEntry->const_offset = base_vertex;
585 }
586 }
587
588 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
589 doutw_count++;
590 pvr_write_draw_index_control =
591 pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
592
593 /* Set draw index to 0 for non-indirect variants. */
594 if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
595 0) {
596 struct pvr_const_map_entry_literal32 *literal_entry;
597
598 draw_index =
599 pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
600
601 literal_entry =
602 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
603 sizeof(*literal_entry));
604 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
605 literal_entry->const_offset = draw_index;
606 literal_entry->literal_value = 0;
607 }
608 }
609
610 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
611 /* Load absolute instance id into uiInstanceIdTemp. */
612 PVR_PDS_MODE_TOGGLE(
613 code,
614 instruction,
615 pvr_pds_inst_encode_add32(
616 /* cc */ 0,
617 /* alum */ 0,
618 /* sna */ 0,
619 /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
620 /* src1 */ R32_T(instance_ID_temp),
621 /* dst */ R32TP_T(instance_ID_temp)));
622 } else if (input_program->flags &
623 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
624 struct pvr_const_map_entry_base_instance *base_instance_entry =
625 (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
626
627 base_instance =
628 pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
629
630 PVR_PDS_MODE_TOGGLE(code,
631 instruction,
632 pvr_pds_inst_encode_add32(
633 /* cc */ 0,
634 /* alum */ 0,
635 /* sna */ 0,
636 /* src0 */ R32_C(base_instance),
637 /* src1 */ R32_T(instance_ID_temp),
638 /* dst */ R32TP_T(instance_ID_temp)));
639
640 base_instance_entry =
641 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
642 sizeof(*base_instance_entry));
643 base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
644 base_instance_entry->const_offset = base_instance;
645 } else if (input_program->flags &
646 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
647 struct pvr_const_map_entry_base_instance *base_instance_entry =
648 (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
649
650 base_instance = pvr_find_constant(const_usage,
651 RESERVE_32BIT,
652 "base_instance (Driver Const)");
653
654 /* Base instance provided by the driver. */
655 base_instance_entry =
656 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
657 sizeof(*base_instance_entry));
658 base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
659 base_instance_entry->const_offset = base_instance;
660 }
661
662 total_dma_count = ddmad_count;
663
664 total_dma_count += doutw_count;
665
666 if (use_robust_vertex_fetch) {
667 pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
668
669 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
670 zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
671
672 /* Load 0 into instance_ID_temp. */
673 PVR_PDS_MODE_TOGGLE(code,
674 instruction,
675 pvr_pds_inst_encode_limm(0, /* cc */
676 zero_temp, /* SRC1 */
677 0, /* SRC0 */
678 0 /* GR */
679 ));
680 } else {
681 zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
682
683 max_index_temp =
684 pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
685 current_index_temp =
686 pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
687
688 PVR_PDS_MODE_TOGGLE(code,
689 instruction,
690 pvr_pds_inst_encode_sftlp64(
691 0, /* cc */
692 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
693 1, /* IM */
694 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
695 */
696 R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
697 */
698 0, /* SRC2 (REGS32) */
699 R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
700 ));
701 PVR_PDS_MODE_TOGGLE(code,
702 instruction,
703 pvr_pds_inst_encode_sftlp64(
704 0, /* cc */
705 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
706 1, /* IM */
707 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
708 */
709 0, /* SRC1 (REGS64TP) */
710 0, /* SRC2 (REGS32) */
711 R64TP_T(current_index_temp >> 1) /* DST */
712 /* (REG64TP) */
713 ));
714 PVR_PDS_MODE_TOGGLE(code,
715 instruction,
716 pvr_pds_inst_encode_sftlp64(
717 0, /* cc */
718 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
719 1, /* IM */
720 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
721 */
722 0, /* SRC1 (REGS64TP) */
723 0, /* SRC2 (REGS32) */
724 R64TP_T(max_index_temp >> 1) /* DST */
725 /* (REG64TP) */
726 ));
727 }
728 }
729
730 if (input_program->dma_count && use_robust_vertex_fetch) {
731 PVR_PDS_MODE_TOGGLE(
732 code,
733 instruction,
734 pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
735 0, /* Neg */
736 PVR_HAS_FEATURE(dev_info, pds_ddmadt)
737 ? PVR_ROGUE_PDSINST_PREDICATE_OOB
738 : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
739 1 /* Addr */
740 ));
741 }
742
743 for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
744 uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
745 uint32_t control_word;
746 struct pvr_const_map_entry_literal32 *literal_entry;
747
748 const struct pvr_pds_vertex_dma *vertex_dma =
749 &input_program->dma_list[dma];
750 bool last_dma = (++running_dma_count == total_dma_count);
751
752 pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_dma);
753
754 /* The id we use to index into this dma. */
755 if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
756 pvr_debug_pds_note("Instance Rate (divisor = %d)",
757 vertex_dma->divisor);
758
759 /* 4 - madd 0 - needs to be 64-bit aligned
760 * 5 - madd 1
761 */
762 if (vertex_dma->divisor > 1) {
763 const uint32_t adjusted_instance_ID_temp =
764 pvr_get_temps(&temp_usage,
765 RESERVE_64BIT,
766 "adjusted_instance_ID_temp");
767 const uint32_t MADD_temp =
768 pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
769
770 /* 1. Remove base instance value from temp 1 to get instance id
771 * 2. Divide the instance id by the divisor - Iout = (Iin *
772 * Multiplier) >> (shift+31)
773 * 3. Add the base instance back on.
774 *
775 * Need two zero temps for the add part of the later MAD.
776 */
777
778 PVR_PDS_MODE_TOGGLE(code,
779 instruction,
780 pvr_pds_inst_encode_add64(
781 /* cc */ 0,
782 /* alum */ 0,
783 /* sna */ 1,
784 /* src0 */ R64_T(MADD_temp >> 1),
785 /* src1 */ R64_T(MADD_temp >> 1),
786 /* dst */ R64TP_T(MADD_temp >> 1)));
787
788 if (input_program->flags &
789 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
790 /* Subtract base instance from temp 1, put into
791 * adjusted_instance_ID_temp.
792 */
793 PVR_PDS_MODE_TOGGLE(
794 code,
795 instruction,
796 pvr_pds_inst_encode_add32(
797 /* cc */ 0,
798 /* alum */ 0,
799 /* sna */ 1,
800 /* src0 */ R32_T(instance_ID_temp),
801 /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
802 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
803 } else if (input_program->flags &
804 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
805 /* Subtract base instance from temp 1, put into
806 * adjusted_instance_ID_temp.
807 */
808 PVR_PDS_MODE_TOGGLE(
809 code,
810 instruction,
811 pvr_pds_inst_encode_add32(
812 /* cc */ 0,
813 /* alum */ 0,
814 /* sna */ 1,
815 /* src0 */ R32_T(instance_ID_temp),
816 /* src1 */ R32_C(base_instance),
817 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
818 } else {
819 /* Copy instance from temp 1 to adjusted_instance_ID_temp.
820 */
821 PVR_PDS_MODE_TOGGLE(
822 code,
823 instruction,
824 pvr_pds_inst_encode_add32(
825 /* cc */ 0,
826 /* alum */ 0,
827 /* sna */ 0,
828 /* src0 */ R32_T(instance_ID_temp),
829 /* src1 */ R32_T(MADD_temp), /* MADD_temp is set
830 * to 0 at this point.
831 */
832 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
833 }
834
835 /* shift = the bit of the next highest power of two. */
836 uint32_t shift_unsigned =
837 (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
838 int32_t shift = (int32_t)shift_unsigned;
839 uint32_t shift_2s_comp;
840
841 pvr_debug_pds_note(
842 "Perform instance rate divide (as integer multiply and rshift)");
843
844 const uint32_t multipier_constant =
845 pvr_find_constant(const_usage,
846 RESERVE_32BIT,
847 "MultiplierConstant (for InstanceDivisor)");
848
849 /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
850 note: the division above is integer division. */
851 uint64_t multipier64 =
852 (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
853 ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
854 (uint64_t)vertex_dma->divisor);
855 uint32_t multiplier = (uint32_t)multipier64;
856
857 pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
858 multiplier);
859 pvr_debug_pds_note(" - Value of Shift = %d", shift);
860
861 literal_entry =
862 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
863 sizeof(*literal_entry));
864 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
865 literal_entry->const_offset = multipier_constant;
866 literal_entry->literal_value = multiplier;
867
868 /* (Iin * Multiplier) */
869 PVR_PDS_MODE_TOGGLE(
870 code,
871 instruction,
872 pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
873 0, /* Unsigned ALU mode */
874 0, /* Unconditional */
875 R32_C(multipier_constant),
876 R32_T(adjusted_instance_ID_temp),
877 R64_T(MADD_temp / 2),
878 R64TP_T(MADD_temp / 2)));
879
880 /* >> (shift + 31) */
881 shift += 31;
882 shift *= -1;
883
884 if (shift < -31) {
885 /* >> (31) */
886 shift_2s_comp = 0xFFFE1;
887 PVR_PDS_MODE_TOGGLE(code,
888 instruction,
889 pvr_pds_inst_encode_sftlp64(
890 /* cc */ 0,
891 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
892 /* IM */ 1, /* enable immediate */
893 /* SRC0 */ R64_T(MADD_temp / 2),
894 /* SRC1 */ 0, /* This won't be used
895 in a shift
896 operation. */
897 /* SRC2 (Shift) */ shift_2s_comp,
898 /* DST */ R64TP_T(MADD_temp / 2)));
899 shift += 31;
900 }
901
902 /* >> (shift + 31) */
903 shift_2s_comp = *((uint32_t *)&shift);
904 PVR_PDS_MODE_TOGGLE(code,
905 instruction,
906 pvr_pds_inst_encode_sftlp64(
907 /* cc */ 0,
908 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
909 /* IM */ 1, /* enable immediate */
910 /* SRC0 */ R64_T(MADD_temp / 2),
911 /* SRC1 */ 0, /* This won't be used
912 * in a shift
913 * operation. */
914 /* SRC2 (Shift) */ shift_2s_comp,
915 /* DST */ R64TP_T(MADD_temp / 2)));
916
917 if (input_program->flags &
918 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
919 /* Add base instance. */
920 PVR_PDS_MODE_TOGGLE(
921 code,
922 instruction,
923 pvr_pds_inst_encode_add32(
924 /* cc */ 0,
925 /* alum */ 0,
926 /* sna */ 0,
927 /* src0 */ R32_T(MADD_temp),
928 /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
929 /* dst */ R32TP_T(MADD_temp)));
930 } else if (input_program->flags &
931 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
932 /* Add base instance. */
933 PVR_PDS_MODE_TOGGLE(code,
934 instruction,
935 pvr_pds_inst_encode_add32(
936 /* cc */ 0,
937 /* alum */ 0,
938 /* sna */ 0,
939 /* src0 */ R32_T(MADD_temp),
940 /* src1 */ R32_C(base_instance),
941 /* dst */ R32TP_T(MADD_temp)));
942 }
943
944 pvr_debug_pds_note(
945 "DMA Vertex Index will be sourced from 'MADD_temp'");
946 index = MADD_temp;
947 } else if (vertex_dma->divisor == 0) {
948 if (base_instance_ID_temp == PVR_INVALID_TEMP) {
949 base_instance_ID_temp = pvr_get_temps(&temp_usage,
950 RESERVE_32BIT,
951 "uBaseInstanceIDTemp");
952 }
953
954 /* Load 0 into instance_ID_temp. */
955 PVR_PDS_MODE_TOGGLE(code,
956 instruction,
957 pvr_pds_inst_encode_limm(
958 /* cc */ 0,
959 /* src1 */ base_instance_ID_temp,
960 /* src0 */ 0,
961 /* gr */ 0));
962
963 if (input_program->flags &
964 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
965 /* Add base instance. */
966 PVR_PDS_MODE_TOGGLE(
967 code,
968 instruction,
969 pvr_pds_inst_encode_add32(
970 /* cc */ 0,
971 /* alum */ 0,
972 /* sna */ 0,
973 /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
974 /* src1 */ R32_T(base_instance_ID_temp),
975 /* dst */ R32TP_T(base_instance_ID_temp)));
976
977 } else if (input_program->flags &
978 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
979 /* Add base instance. */
980 PVR_PDS_MODE_TOGGLE(
981 code,
982 instruction,
983 pvr_pds_inst_encode_add32(
984 /* cc */ 0,
985 /* alum */ 0,
986 /* sna */ 0,
987 /* src0 */ R32_C(base_instance),
988 /* src1 */ R32_T(base_instance_ID_temp),
989 /* dst */ R32TP_T(base_instance_ID_temp)));
990 }
991
992 pvr_debug_pds_note(
993 "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
994 index = base_instance_ID_temp;
995 } else {
996 pvr_debug_pds_note(
997 "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
998 index = instance_ID_temp;
999 }
1000 } else {
1001 pvr_debug_pds_note(
1002 "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
1003 index = index_id_temp;
1004 }
1005
1006 /* DDMAD Const Usage [__XX_---] */
1007 pvr_write_pds_const_map_entry_vertex_attribute_address(
1008 &entry_write_state,
1009 vertex_dma,
1010 const_base + 4,
1011 use_robust_vertex_fetch);
1012
1013 /* DDMAD Const Usage [__XXX---] */
1014 literal_entry =
1015 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1016 sizeof(*literal_entry));
1017 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1018 literal_entry->const_offset = const_base + 3;
1019 literal_entry->literal_value = vertex_dma->stride;
1020
1021 control_word = vertex_dma->size_in_dwords
1022 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1023 control_word |= vertex_dma->destination
1024 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1025 control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1026 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1027
1028 /* DDMADT instructions will do a dummy doutd when OOB if
1029 * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
1030 * would need to do another doutd after an OOB DDMADT to provide the 'in
1031 * bounds' data the DDMADT can't be set as LAST.
1032 *
1033 * This requires us to include a final dummy DDMAD.LAST instruction.
1034 *
1035 * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
1036 * Specification.doc
1037 *
1038 * DDMAD src0,src1,src2,src3
1039 *
1040 * calculated_source_address := src0*src1+src2
1041 * base_address := src2
1042 * dma_parameters := src3[31:0]
1043 * buffer_size := src3[63:33]
1044 * test := src3[32]
1045 *
1046 * if (test == 1) {
1047 * // DDMAD(T)
1048 * if (calculated_source_address[39:0] + (burst_size<<2) <=
1049 * base_address[39:0] + buffer_size) {
1050 * OOB := 0
1051 * DOUTD calculated_source_address,dma_paramters
1052 * } else {
1053 * OOB := 1
1054 * if (last_instance == 1) {
1055 * dma_parameters[BURST_SIZE] := 0
1056 * DOUTD calculated_source_address,dma_paramters
1057 * }
1058 * }
1059 * } else {
1060 * // DDMAD
1061 * DOUTD calculated_source_address,dma_paramters
1062 * }
1063 */
1064
1065 if (last_dma && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
1066 !use_robust_vertex_fetch)) {
1067 pvr_debug_pds_note("LAST DDMAD");
1068 control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1069 }
1070
1071 /* DDMAD Const Usage [_XXXX---] */
1072 literal_entry =
1073 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1074 sizeof(*literal_entry));
1075 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1076 literal_entry->const_offset = (const_base + 6);
1077 literal_entry->literal_value = control_word;
1078
1079 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1080 /* DDMAD Const Usage [XXXXX---]
1081 * With DDMADT an extra 32bits of SRC3 contains the information for
1082 * performing out-of-bounds tests on the DMA.
1083 */
1084
1085 if (use_robust_vertex_fetch) {
1086 struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
1087 *obb_buffer_size;
1088 obb_buffer_size =
1089 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1090 sizeof(*obb_buffer_size));
1091
1092 obb_buffer_size->type =
1093 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
1094 obb_buffer_size->const_offset = const_base + 7;
1095 obb_buffer_size->binding_index = vertex_dma->binding_index;
1096 } else {
1097 literal_entry =
1098 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1099 sizeof(*literal_entry));
1100 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1101 literal_entry->const_offset = const_base + 7;
1102 literal_entry->literal_value = 0;
1103 }
1104
1105 PVR_PDS_MODE_TOGGLE(
1106 code,
1107 instruction,
1108 pvr_pds_inst_encode_ddmad(0, /* cc */
1109 0, /* END */
1110 R32_C(const_base + 3), /* SRC0 (REGS32) */
1111 index, /* SRC1 (REGS32T) */
1112 R64_C((const_base + 4) >> 1), /* SRC2
1113 * (REGS64)
1114 */
1115 R64_C((const_base + 6) >> 1) /* SRC3
1116 * (REGS64C)
1117 */
1118 ));
1119
1120 if (use_robust_vertex_fetch) {
1121 /* If not out of bounds, skip next DDMAD instructions. */
1122 PVR_PDS_MODE_TOGGLE(code,
1123 instruction,
1124 pvr_pds_inst_encode_ddmad(
1125 1, /* cc */
1126 0, /* END */
1127 R32_C(const_base + 3), /* SRC0 (REGS32) */
1128 R32_T(zero_temp), /* SRC1 (REGS32T) */
1129 R64_C((const_base + 4) >> 1), /* SRC2
1130 * (REGS64)
1131 */
1132 R64_C((const_base + 6) >> 1) /* SRC3
1133 * (REGS64C)
1134 */
1135 ));
1136
1137 /* Now the driver must have a dummy DDMAD marked as last. */
1138 if (last_dma) {
1139 uint32_t dummy_dma_const = pvr_find_constant(const_usage,
1140 RESERVE_64BIT,
1141 "uDummyDMAConst");
1142 uint32_t zero_const =
1143 pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
1144
1145 literal_entry =
1146 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1147 sizeof(*literal_entry));
1148 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1149 literal_entry->const_offset = zero_const;
1150 literal_entry->literal_value = 0;
1151
1152 literal_entry =
1153 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1154 sizeof(*literal_entry));
1155 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1156 literal_entry->const_offset = zero_const + 1;
1157 literal_entry->literal_value = 0;
1158
1159 literal_entry =
1160 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1161 sizeof(*literal_entry));
1162 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1163 literal_entry->const_offset = dummy_dma_const;
1164 literal_entry->literal_value = 0;
1165
1166 literal_entry->literal_value |=
1167 0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1168 literal_entry->literal_value |=
1169 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1170 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1171 literal_entry->literal_value |=
1172 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1173
1174 literal_entry =
1175 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1176 sizeof(*literal_entry));
1177 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1178 literal_entry->const_offset = dummy_dma_const + 1;
1179 literal_entry->literal_value = 0;
1180
1181 PVR_PDS_MODE_TOGGLE(code,
1182 instruction,
1183 pvr_pds_inst_encode_ddmad(
1184 0, /* cc */
1185 0, /* END */
1186 R32_C(zero_const), /* SRC0 (REGS32)
1187 */
1188 R32_T(zero_temp), /* SRC1 (REGS32T)
1189 */
1190 R64_C((dummy_dma_const) >> 1), /* SRC2
1191 (REGS64)
1192 */
1193 R64_C((dummy_dma_const) >> 1) /* SRC3
1194 (REGS64C)
1195 */
1196 ));
1197 }
1198 }
1199 } else {
1200 if (use_robust_vertex_fetch) {
1201 struct pvr_const_map_entry_vertex_attribute_max_index
1202 *max_index_entry;
1203
1204 pvr_debug("RobustVertexFetch DDMAD");
1205
1206 const uint32_t max_index_const =
1207 pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
1208
1209 max_index_entry =
1210 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1211 sizeof(*max_index_entry));
1212 max_index_entry->const_offset = max_index_const;
1213 max_index_entry->type =
1214 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
1215 max_index_entry->binding_index = vertex_dma->binding_index;
1216 max_index_entry->offset = vertex_dma->offset;
1217 max_index_entry->stride = vertex_dma->stride;
1218 max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
1219 max_index_entry->component_size_in_bytes =
1220 vertex_dma->component_size_in_bytes;
1221
1222 PVR_PDS_MODE_TOGGLE(
1223 code,
1224 instruction,
1225 pvr_pds_inst_encode_add32(0, /* cc */
1226 0, /* ALUM */
1227 PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
1228 R32_C(max_index_const), /* SRC0
1229 * (REGS32)
1230 */
1231 R32_T(zero_temp), /* SRC1 (REGS32) */
1232 R32TP_T(max_index_temp) /* DST
1233 * (REG32TP)
1234 */
1235 ));
1236
1237 PVR_PDS_MODE_TOGGLE(code,
1238 instruction,
1239 pvr_pds_inst_encode_sftlp32(
1240 1, /* IM */
1241 0, /* cc */
1242 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1243 index, /* SRC0 (REGS32T) */
1244 0, /* SRC1 (REGS32) */
1245 0, /* SRC2 (REG32TP) */
1246 R32TP_T(current_index_temp) /* DST
1247 * (REG32TP)
1248 */
1249 ));
1250
1251 PVR_PDS_MODE_TOGGLE(
1252 code,
1253 instruction,
1254 pvr_pds_inst_encode_cmp(
1255 0, /* cc enable */
1256 PVR_ROGUE_PDSINST_COP_GT, /* Operation */
1257 R64TP_T(current_index_temp >> 1), /* SRC
1258 * (REGS64TP)
1259 */
1260 R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
1261 ));
1262
1263 PVR_PDS_MODE_TOGGLE(code,
1264 instruction,
1265 pvr_pds_inst_encode_sftlp32(
1266 1, /* IM */
1267 1, /* cc */
1268 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1269 zero_temp, /* SRC0 (REGS32T) */
1270 0, /* SRC1 (REGS32) */
1271 0, /* SRC2 (REG32TP) */
1272 R32TP_T(current_index_temp) /* DST
1273 * (REG32TP)
1274 */
1275 ));
1276
1277 PVR_PDS_MODE_TOGGLE(code,
1278 instruction,
1279 pvr_pds_inst_encode_ddmad(
1280 0, /* cc */
1281 0, /* END */
1282 R32_C(const_base + 3), /* SRC0 (REGS32) */
1283 current_index_temp, /* SRC1 (REGS32T) */
1284 R64_C((const_base + 4) >> 1), /* SRC2
1285 * (REGS64)
1286 */
1287 (const_base + 6) >> 1 /* SRC3 (REGS64C) */
1288 ));
1289 } else {
1290 PVR_PDS_MODE_TOGGLE(code,
1291 instruction,
1292 pvr_pds_inst_encode_ddmad(
1293 /* cc */ 0,
1294 /* end */ 0,
1295 /* src0 */ R32_C(const_base + 3),
1296 /* src1 */ (index),
1297 /* src2 */ R64_C((const_base + 4) >> 1),
1298 /* src3 */ (const_base + 6) >> 1));
1299 }
1300 }
1301 }
1302
1303 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
1304 bool last_dma = (++running_dma_count == total_dma_count);
1305
1306 PVR_PDS_MODE_TOGGLE(
1307 code,
1308 instruction,
1309 pvr_encode_direct_write(
1310 &entry_write_state,
1311 last_dma,
1312 false,
1313 R64_C(write_vertex_control),
1314 R64_T(0),
1315 0x1,
1316 input_program->vertex_id_register,
1317 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1318 dev_info));
1319 }
1320
1321 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
1322 bool last_dma = (++running_dma_count == total_dma_count);
1323
1324 PVR_PDS_MODE_TOGGLE(
1325 code,
1326 instruction,
1327 pvr_encode_direct_write(
1328 &entry_write_state,
1329 last_dma,
1330 false,
1331 R64_C(write_instance_control),
1332 R64_T(0),
1333 0x2,
1334 input_program->instance_id_register,
1335 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1336 dev_info));
1337 }
1338
1339 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
1340 bool last_dma = (++running_dma_count == total_dma_count);
1341
1342 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1343 /* Base instance comes from ptemp 1. */
1344 PVR_PDS_MODE_TOGGLE(
1345 code,
1346 instruction,
1347 pvr_encode_direct_write(
1348 &entry_write_state,
1349 last_dma,
1350 false,
1351 R64_C(write_base_instance_control),
1352 R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
1353 0x2,
1354 input_program->base_instance_register,
1355 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1356 dev_info));
1357 } else {
1358 uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
1359
1360 /* Base instance comes from driver constant. */
1361 PVR_PDS_MODE_TOGGLE(
1362 code,
1363 instruction,
1364 pvr_encode_direct_write(
1365 &entry_write_state,
1366 last_dma,
1367 false,
1368 R64_C(write_base_instance_control),
1369 R64_C(base_instance >> 1),
1370 data_mask,
1371 input_program->base_instance_register,
1372 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1373 dev_info));
1374 }
1375 }
1376
1377 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
1378 bool last_dma = (++running_dma_count == total_dma_count);
1379
1380 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1381 /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
1382 PVR_PDS_MODE_TOGGLE(
1383 code,
1384 instruction,
1385 pvr_encode_direct_write(
1386 &entry_write_state,
1387 last_dma,
1388 false,
1389 R64_C(write_base_vertex_control),
1390 R64_P(0),
1391 0x1,
1392 input_program->base_vertex_register,
1393 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1394 dev_info));
1395 } else {
1396 uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
1397
1398 /* Base vertex comes from driver constant (literal 0). */
1399 PVR_PDS_MODE_TOGGLE(
1400 code,
1401 instruction,
1402 pvr_encode_direct_write(
1403 &entry_write_state,
1404 last_dma,
1405 false,
1406 R64_C(write_base_vertex_control),
1407 R64_C(base_vertex >> 1),
1408 data_mask,
1409 input_program->base_vertex_register,
1410 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1411 dev_info));
1412 }
1413 }
1414
1415 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
1416 bool last_dma = (++running_dma_count == total_dma_count);
1417
1418 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1419 /* Draw index comes from ptemp 3. */
1420 PVR_PDS_MODE_TOGGLE(
1421 code,
1422 instruction,
1423 pvr_encode_direct_write(
1424 &entry_write_state,
1425 last_dma,
1426 false,
1427 R64_C(pvr_write_draw_index_control),
1428 R64_P(1),
1429 0x2,
1430 input_program->draw_index_register,
1431 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1432 dev_info));
1433 } else {
1434 uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
1435
1436 /* Draw index comes from constant (literal 0). */
1437 PVR_PDS_MODE_TOGGLE(
1438 code,
1439 instruction,
1440 pvr_encode_direct_write(
1441 &entry_write_state,
1442 last_dma,
1443 false,
1444 R64_C(pvr_write_draw_index_control),
1445 R64_C(draw_index >> 1),
1446 data_mask,
1447 input_program->draw_index_register,
1448 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1449 dev_info));
1450 }
1451 }
1452
1453 doutu_address_entry =
1454 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1455 sizeof(*doutu_address_entry));
1456 doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1457 doutu_address_entry->const_offset = 0;
1458 doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
1459
1460 if (use_robust_vertex_fetch) {
1461 /* Restore IF0 */
1462 PVR_PDS_MODE_TOGGLE(
1463 code,
1464 instruction,
1465 pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
1466 0, /* Neg */
1467 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
1468 1 /* Addr */
1469 ));
1470 }
1471
1472 PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
1473 PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
1474
1475 assert(running_dma_count == total_dma_count);
1476
1477 for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
1478 if (const_usage[i] == 0)
1479 break;
1480
1481 info->data_size_in_dwords =
1482 8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
1483 }
1484
1485 info->temps_required = temp_usage.temps_needed;
1486 info->entry_count = entry_write_state.entry_count;
1487 info->entries_written_size_in_bytes =
1488 entry_write_state.entries_size_in_bytes;
1489 info->code_size_in_dwords = instruction;
1490
1491 pvr_debug("=================================================\n");
1492 }
1493
pvr_pds_generate_descriptor_upload_program(struct pvr_pds_descriptor_program_input * input_program,uint32_t * code_section,struct pvr_pds_info * info)1494 void pvr_pds_generate_descriptor_upload_program(
1495 struct pvr_pds_descriptor_program_input *input_program,
1496 uint32_t *code_section,
1497 struct pvr_pds_info *info)
1498 {
1499 unsigned int num_consts64;
1500 unsigned int num_consts32;
1501 unsigned int next_const64;
1502 unsigned int next_const32;
1503 unsigned int instruction = 0;
1504 uint32_t compile_time_buffer_index = 0;
1505
1506 unsigned int total_dma_count = 0;
1507 unsigned int running_dma_count = 0;
1508
1509 struct pvr_pds_const_map_entry_write_state entry_write_state;
1510
1511 /* Calculate the total register usage so we can stick 32-bit consts
1512 * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
1513 * constant.
1514 */
1515 num_consts32 = input_program->descriptor_set_count;
1516 num_consts64 = input_program->descriptor_set_count;
1517 total_dma_count = input_program->descriptor_set_count;
1518
1519 /* 1 DOUTD for buffer containing address literals. */
1520 if (input_program->addr_literal_count > 0) {
1521 num_consts32++;
1522 num_consts64++;
1523 total_dma_count++;
1524 }
1525
1526 pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
1527
1528 for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1529 struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1530
1531 /* This switch statement looks pointless but we want to optimize DMAs
1532 * that can be done as a DOUTW.
1533 */
1534 switch (buffer->type) {
1535 default: {
1536 /* 1 DOUTD per compile time buffer: */
1537 num_consts32++;
1538 num_consts64++;
1539 total_dma_count++;
1540 break;
1541 }
1542 }
1543 }
1544
1545 /* DOUTU for the secondary update program requires a 64-bit constant. */
1546 if (input_program->secondary_program_present)
1547 num_consts64++;
1548
1549 info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
1550
1551 /* Start counting constants. */
1552 next_const64 = 0;
1553 next_const32 = num_consts64 * 2;
1554
1555 if (input_program->addr_literal_count > 0) {
1556 bool last_dma = (++running_dma_count == total_dma_count);
1557 bool halt = last_dma && !input_program->secondary_program_present;
1558
1559 unsigned int size_in_dwords = input_program->addr_literal_count *
1560 sizeof(uint64_t) / sizeof(uint32_t);
1561 unsigned int destination = input_program->addr_literals[0].destination;
1562
1563 struct pvr_pds_const_map_entry_addr_literal_buffer
1564 *addr_literal_buffer_entry;
1565
1566 addr_literal_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1567 &entry_write_state,
1568 sizeof(*addr_literal_buffer_entry));
1569
1570 addr_literal_buffer_entry->type =
1571 PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL_BUFFER;
1572 addr_literal_buffer_entry->size = PVR_DW_TO_BYTES(size_in_dwords);
1573 addr_literal_buffer_entry->const_offset = next_const64 * 2;
1574
1575 for (unsigned int i = 0; i < input_program->addr_literal_count; i++) {
1576 struct pvr_pds_const_map_entry_addr_literal *addr_literal_entry;
1577
1578 /* Check that the destinations for the addr literals are contiguous.
1579 * Not supporting non contiguous ranges as that would either require a
1580 * single large buffer with wasted memory for DMA, or multiple buffers
1581 * to DMA.
1582 */
1583 if (i > 0) {
1584 const uint32_t current_addr_literal_destination =
1585 input_program->addr_literals[i].destination;
1586 const uint32_t previous_addr_literal_destination =
1587 input_program->addr_literals[i - 1].destination;
1588
1589 /* 2 regs to store 64 bits address. */
1590 assert(current_addr_literal_destination ==
1591 previous_addr_literal_destination + 2);
1592 }
1593
1594 addr_literal_entry =
1595 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1596 sizeof(*addr_literal_entry));
1597
1598 addr_literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL;
1599 addr_literal_entry->addr_type = input_program->addr_literals[i].type;
1600 }
1601
1602 PVR_PDS_MODE_TOGGLE(code_section,
1603 instruction,
1604 pvr_encode_burst_cs(&entry_write_state,
1605 last_dma,
1606 halt,
1607 next_const32,
1608 next_const64,
1609 size_in_dwords,
1610 destination));
1611
1612 next_const64++;
1613 next_const32++;
1614 }
1615
1616 /* For each descriptor set perform a DOUTD. */
1617 for (unsigned int descriptor_index = 0;
1618 descriptor_index < input_program->descriptor_set_count;
1619 descriptor_index++) {
1620 struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
1621 struct pvr_pds_descriptor_set *descriptor_set =
1622 &input_program->descriptor_sets[descriptor_index];
1623
1624 bool last_dma = (++running_dma_count == total_dma_count);
1625 bool halt = last_dma && !input_program->secondary_program_present;
1626
1627 descriptor_set_entry =
1628 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1629 sizeof(*descriptor_set_entry));
1630 descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
1631 descriptor_set_entry->const_offset = next_const64 * 2;
1632 descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
1633 descriptor_set_entry->primary = descriptor_set->primary;
1634 descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
1635
1636 PVR_PDS_MODE_TOGGLE(code_section,
1637 instruction,
1638 pvr_encode_burst_cs(&entry_write_state,
1639 last_dma,
1640 halt,
1641 next_const32,
1642 next_const64,
1643 descriptor_set->size_in_dwords,
1644 descriptor_set->destination));
1645
1646 next_const64++;
1647 next_const32++;
1648 }
1649
1650 for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1651 struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1652
1653 bool last_dma = (++running_dma_count == total_dma_count);
1654 bool halt = last_dma && !input_program->secondary_program_present;
1655
1656 switch (buffer->type) {
1657 case PVR_BUFFER_TYPE_PUSH_CONSTS: {
1658 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1659
1660 special_buffer_entry =
1661 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1662 sizeof(*special_buffer_entry));
1663 special_buffer_entry->type =
1664 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1665 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
1666 special_buffer_entry->buffer_index = buffer->source_offset;
1667 break;
1668 }
1669 case PVR_BUFFER_TYPE_DYNAMIC: {
1670 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1671
1672 special_buffer_entry =
1673 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1674 sizeof(*special_buffer_entry));
1675 special_buffer_entry->type =
1676 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1677 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
1678 special_buffer_entry->buffer_index = buffer->source_offset;
1679 break;
1680 }
1681 case PVR_BUFFER_TYPE_COMPILE_TIME: {
1682 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1683
1684 special_buffer_entry =
1685 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1686 sizeof(*special_buffer_entry));
1687 special_buffer_entry->type =
1688 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1689 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_COMPILE_TIME;
1690 special_buffer_entry->buffer_index = compile_time_buffer_index++;
1691 break;
1692 }
1693 case PVR_BUFFER_TYPE_BUFFER_LENGTHS: {
1694 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1695
1696 special_buffer_entry =
1697 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1698 sizeof(*special_buffer_entry));
1699 special_buffer_entry->type =
1700 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1701 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BUFFER_LENGTHS;
1702 break;
1703 }
1704 case PVR_BUFFER_TYPE_BLEND_CONSTS: {
1705 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1706
1707 special_buffer_entry =
1708 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1709 sizeof(*special_buffer_entry));
1710 special_buffer_entry->type =
1711 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1712 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
1713 special_buffer_entry->buffer_index =
1714 input_program->blend_constants_used_mask;
1715 break;
1716 }
1717 case PVR_BUFFER_TYPE_UBO: {
1718 struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
1719
1720 constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1721 &entry_write_state,
1722 sizeof(*constant_buffer_entry));
1723 constant_buffer_entry->type =
1724 PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
1725 constant_buffer_entry->buffer_id = buffer->buffer_id;
1726 constant_buffer_entry->desc_set = buffer->desc_set;
1727 constant_buffer_entry->binding = buffer->binding;
1728 constant_buffer_entry->offset = buffer->source_offset;
1729 constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1730 break;
1731 }
1732 case PVR_BUFFER_TYPE_UBO_ZEROING: {
1733 struct pvr_const_map_entry_constant_buffer_zeroing
1734 *constant_buffer_entry;
1735
1736 constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1737 &entry_write_state,
1738 sizeof(*constant_buffer_entry));
1739 constant_buffer_entry->type =
1740 PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
1741 constant_buffer_entry->buffer_id = buffer->buffer_id;
1742 constant_buffer_entry->offset = buffer->source_offset;
1743 constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1744 break;
1745 }
1746 }
1747
1748 entry_write_state.entry->const_offset = next_const64 * 2;
1749
1750 PVR_PDS_MODE_TOGGLE(code_section,
1751 instruction,
1752 pvr_encode_burst_cs(&entry_write_state,
1753 last_dma,
1754 halt,
1755 next_const32,
1756 next_const64,
1757 buffer->size_in_dwords,
1758 buffer->destination));
1759
1760 next_const64++;
1761 next_const32++;
1762 }
1763
1764 if (total_dma_count != running_dma_count)
1765 fprintf(stderr, "Mismatch in DMA count\n");
1766
1767 if (input_program->secondary_program_present) {
1768 struct pvr_const_map_entry_doutu_address *doutu_address;
1769
1770 PVR_PDS_MODE_TOGGLE(code_section,
1771 instruction,
1772 pvr_pds_encode_doutu(false, true, next_const64));
1773
1774 doutu_address =
1775 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1776 sizeof(*doutu_address));
1777 doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1778 doutu_address->const_offset = next_const64 * 2;
1779 doutu_address->doutu_control = input_program->secondary_task_control.src0;
1780
1781 next_const64++;
1782 }
1783
1784 if (instruction == 0 && input_program->must_not_be_empty) {
1785 PVR_PDS_MODE_TOGGLE(code_section,
1786 instruction,
1787 pvr_pds_inst_encode_halt(
1788 /* cc */ false));
1789 }
1790
1791 info->entry_count = entry_write_state.entry_count;
1792 info->entries_written_size_in_bytes =
1793 entry_write_state.entries_size_in_bytes;
1794 info->code_size_in_dwords = instruction;
1795 }
1796