xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pds/pvr_pipeline_pds.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "pvr_types.h"
36 #include "util/log.h"
37 #include "util/macros.h"
38 
39 #define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
40 #define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
41 #define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
42 
43 #define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
44 #define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
45 
46 #define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
47 #define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
48 #define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
49 
50 #define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
51 #define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
52 
53 /* 32-bit PTemp index for draw indirect base instance. */
54 #define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
55 
56 /* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
57 #define PVR_PDS_DDMAD_NUM_CONSTS 8
58 
59 #if defined(TRACE_PDS)
60 /* Some macros for a pretty printing. */
61 
62 #   define pvr_debug_pds_const(reg, size, annotation) \
63       mesa_logd("const[%d]   @  (%dbits)  %s", reg, size, annotation)
64 #   define pvr_debug_pds_temp(reg, size, annotation) \
65       mesa_logd("temp[%d]    @  (%dbits)  %s", reg, size, annotation)
66 #   define pvr_debug_pds_note(...) mesa_logd("              // " __VA_ARGS__)
67 #   define pvr_debug_pds_flag(flags, flag) \
68       {                                    \
69          if ((flags & flag) == flag)       \
70             mesa_logd(" > " #flag);        \
71       }
72 #   define pvr_debug(annotation) mesa_logd(annotation)
73 
74 #else
75 #   define pvr_debug_pds_const(reg, size, annotation)
76 #   define pvr_debug_pds_temp(reg, size, annotation)
77 #   define pvr_debug_pds_note(...)
78 #   define pvr_debug_pds_flag(flags, flag)
79 #   define pvr_debug(annotation)
80 #endif
81 
82 struct pvr_pds_const_map_entry_write_state {
83    const struct pvr_pds_info *PDS_info;
84    struct pvr_const_map_entry *entry;
85    size_t size_of_last_entry_in_bytes;
86    uint32_t entry_count;
87    size_t entries_size_in_bytes;
88 };
89 
pvr_init_pds_const_map_entry_write_state(struct pvr_pds_info * PDS_info,struct pvr_pds_const_map_entry_write_state * entry_write_state)90 static void pvr_init_pds_const_map_entry_write_state(
91    struct pvr_pds_info *PDS_info,
92    struct pvr_pds_const_map_entry_write_state *entry_write_state)
93 {
94    entry_write_state->PDS_info = PDS_info;
95    entry_write_state->entry = PDS_info->entries;
96    entry_write_state->size_of_last_entry_in_bytes = 0;
97    entry_write_state->entry_count = 0;
98    entry_write_state->entries_size_in_bytes = 0;
99 }
100 
101 /* Returns a pointer to the next struct pvr_const_map_entry. */
pvr_prepare_next_pds_const_map_entry(struct pvr_pds_const_map_entry_write_state * entry_write_state,size_t size_of_next_entry_in_bytes)102 static void *pvr_prepare_next_pds_const_map_entry(
103    struct pvr_pds_const_map_entry_write_state *entry_write_state,
104    size_t size_of_next_entry_in_bytes)
105 {
106    /* Move on to the next entry. */
107    uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
108                           entry_write_state->size_of_last_entry_in_bytes);
109    entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
110 
111    entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
112    entry_write_state->entry_count++;
113    entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
114 
115    /* Check if we can write into the next entry. */
116    assert(entry_write_state->entries_size_in_bytes <=
117           entry_write_state->PDS_info->entries_size_in_bytes);
118 
119    return entry_write_state->entry;
120 }
121 
pvr_write_pds_const_map_entry_vertex_attribute_address(struct pvr_pds_const_map_entry_write_state * entry_write_state,const struct pvr_pds_vertex_dma * DMA,uint32_t const_val,bool use_robust_vertex_fetch)122 static void pvr_write_pds_const_map_entry_vertex_attribute_address(
123    struct pvr_pds_const_map_entry_write_state *entry_write_state,
124    const struct pvr_pds_vertex_dma *DMA,
125    uint32_t const_val,
126    bool use_robust_vertex_fetch)
127 {
128    pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
129                       DMA->size_in_dwords,
130                       DMA->stride,
131                       DMA->offset,
132                       DMA->binding_index);
133 
134    if (use_robust_vertex_fetch) {
135       struct pvr_const_map_entry_robust_vertex_attribute_address
136          *robust_attribute_entry;
137 
138       robust_attribute_entry =
139          pvr_prepare_next_pds_const_map_entry(entry_write_state,
140                                               sizeof(*robust_attribute_entry));
141       robust_attribute_entry->type =
142          PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
143       robust_attribute_entry->const_offset = const_val;
144       robust_attribute_entry->binding_index = DMA->binding_index;
145       robust_attribute_entry->component_size_in_bytes =
146          DMA->component_size_in_bytes;
147       robust_attribute_entry->offset = DMA->offset;
148       robust_attribute_entry->stride = DMA->stride;
149       robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
150       robust_attribute_entry->robustness_buffer_offset =
151          DMA->robustness_buffer_offset;
152    } else {
153       struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
154 
155       attribute_entry =
156          pvr_prepare_next_pds_const_map_entry(entry_write_state,
157                                               sizeof(*attribute_entry));
158       attribute_entry->type =
159          PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
160       attribute_entry->const_offset = const_val;
161       attribute_entry->binding_index = DMA->binding_index;
162       attribute_entry->offset = DMA->offset;
163       attribute_entry->stride = DMA->stride;
164       attribute_entry->size_in_dwords = DMA->size_in_dwords;
165    }
166 }
167 
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)168 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
169                                                    uint32_t end,
170                                                    uint32_t src0)
171 {
172    return pvr_pds_inst_encode_dout(cc,
173                                    end,
174                                    0,
175                                    src0,
176                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
177 }
178 
179 static uint32_t
pvr_encode_burst(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_dma,bool halt,unsigned int const32,unsigned int const64,unsigned int dma_size_in_dwords,unsigned int destination,unsigned int store)180 pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
181                  bool last_dma,
182                  bool halt,
183                  unsigned int const32,
184                  unsigned int const64,
185                  unsigned int dma_size_in_dwords,
186                  unsigned int destination,
187                  unsigned int store)
188 {
189    uint32_t literal_value;
190 
191    /* Encode literal value. */
192    literal_value = dma_size_in_dwords
193                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
194    literal_value |= destination
195                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
196    literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
197                     store;
198 
199    if (last_dma)
200       literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
201 
202    /* Create const map entry. */
203    struct pvr_const_map_entry_literal32 *literal_entry;
204 
205    literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
206                                                         sizeof(*literal_entry));
207    literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
208    literal_entry->const_offset = const32;
209    literal_entry->literal_value = literal_value;
210 
211    /* Encode DOUTD */
212    return pvr_pds_inst_encode_dout(0,
213                                    halt,
214                                    R32_C(const32),
215                                    R64_C(const64),
216                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218 
219 #define pvr_encode_burst_cs(psDataEntry,        \
220                             last_dma,           \
221                             halt,               \
222                             const32,            \
223                             const64,            \
224                             dma_size_in_dwords, \
225                             destination)        \
226    pvr_encode_burst(                            \
227       psDataEntry,                              \
228       last_dma,                                 \
229       halt,                                     \
230       const32,                                  \
231       const64,                                  \
232       dma_size_in_dwords,                       \
233       destination,                              \
234       PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
235 
pvr_encode_direct_write(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_dma,bool halt,unsigned int const32,unsigned int const64,uint32_t data_mask,unsigned int destination,uint32_t destination_store,const struct pvr_device_info * dev_info)236 static uint32_t pvr_encode_direct_write(
237    struct pvr_pds_const_map_entry_write_state *entry_write_state,
238    bool last_dma,
239    bool halt,
240    unsigned int const32,
241    unsigned int const64,
242    uint32_t data_mask,
243    unsigned int destination,
244    uint32_t destination_store,
245    const struct pvr_device_info *dev_info)
246 {
247    struct pvr_const_map_entry_literal32 *literal_entry;
248 
249    uint32_t instruction =
250       pvr_pds_inst_encode_dout(0,
251                                halt,
252                                const32,
253                                const64,
254                                PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
255 
256    literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
257                                                         sizeof(*literal_entry));
258    literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
259    literal_entry->const_offset = const32;
260    literal_entry->literal_value = destination_store;
261 
262    if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
263       literal_entry->literal_value |=
264          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
265    }
266 
267    literal_entry->literal_value |=
268       destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
269 
270    if (data_mask == 0x1) {
271       literal_entry->literal_value |=
272          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
273    } else if (data_mask == 0x2) {
274       literal_entry->literal_value |=
275          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
276    } else {
277       literal_entry->literal_value |=
278          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
279    }
280 
281    if (last_dma) {
282       literal_entry->literal_value |=
283          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
284    }
285 
286    return instruction;
287 }
288 
289 /* Constant and Temporary register allocation
290  * - reserve space for a 32-bit register or a 64-bit register
291  * - returned indices are offsets to 32-bit register locations
292  * - 64-bit registers need to be aligned to even indices.
293  */
294 #define RESERVE_32BIT 1U
295 #define RESERVE_64BIT 2U
296 
297 #if MESA_DEBUG
298 #   define pvr_find_constant(usage, words, name) \
299       pvr_find_constant2(usage, words, name)
300 #   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
301 #else
302 #   define pvr_find_constant(usage, words, name) \
303       pvr_find_constant2(usage, words, NULL);
304 #   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
305 #endif
306 
307 static uint32_t
pvr_find_constant2(uint8_t * const_usage,uint8_t words,const char * const_name)308 pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
309 {
310    uint32_t const_index = ~0;
311    uint32_t step = words;
312    uint8_t mask = (1 << words) - 1;
313 
314    assert(words == 1 || words == 2);
315 
316    /* Find a register at 'step' alignment that satisfies the mask. */
317    for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
318       for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
319          if ((const_usage[i] & (mask << b)) != 0)
320             continue;
321          const_usage[i] |= (mask << b);
322          const_index = i * 8 + b;
323          pvr_debug_pds_const(const_index, words * 32, const_name);
324          return const_index;
325       }
326    }
327 
328    unreachable("Unexpected: Space cannot be found for constant");
329    return ~0;
330 }
331 
332 #define PVR_MAX_PDS_TEMPS 32
333 struct pvr_temp_usage {
334    uint32_t temp_usage;
335    uint8_t temp_used;
336    uint8_t temps_needed;
337 };
338 
339 #define PVR_INVALID_TEMP UINT8_C(~0)
340 
pvr_get_temps2(struct pvr_temp_usage * temps,uint8_t temps_needed,const char * temp_name)341 static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
342                               uint8_t temps_needed,
343                               const char *temp_name)
344 {
345    uint8_t step = temps_needed;
346    uint8_t mask = (1 << temps_needed) - 1;
347 
348    assert(temps_needed == 1 || temps_needed == 2);
349    assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
350 
351    for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
352       if ((temps->temp_usage & (mask << i)) != 0)
353          continue;
354 
355       const size_t clzBits = 8 * sizeof(unsigned int);
356 
357       temps->temp_usage |= (mask << i);
358       temps->temp_used += temps_needed;
359       temps->temps_needed =
360          clzBits - __builtin_clz((unsigned int)temps->temp_usage);
361 
362       pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
363 
364       return i;
365    }
366 
367    unreachable("Unexpected: Space cannot be found for temps");
368    return PVR_INVALID_TEMP;
369 }
370 
371 /**
372  * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
373  * size of a PDS program without actually attempting to store it.
374  *
375  * \param dest The array/memory pointer where the PDS program should be stored.
376  *             If the given code is NULL, automatically switch to count mode
377  *             instead of attempting to fill in unallocated memory.
378  * \param counter The local counter that holds the total instruction count.
379  * \param statement What function call/value should be stored at dest[counter]
380  *                  when condition is false.
381  */
382 
383 #define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
384    if (!dest) {                                       \
385       counter++;                                      \
386    } else {                                           \
387       dest[counter++] = statement;                    \
388       PVR_PDS_PRINT_INST(statement);                  \
389    }
390 
391 /**
392  * Generates the PDS vertex primary program for the dma's listed in the input
393  * structure. Produces the constant map for the Vulkan driver based upon the
394  * requirements of the instructions added to the program.
395  *
396  * PDS Data Layout
397  * ---------------
398  *
399  * The PDS data is optimized for the DDMAD layout, with the data for those
400  * instructions laid out first. The data required for other instructions is laid
401  * out in the entries unused by the DDMADs.
402  *
403  * DDMAD layout
404  * \verbatim
405  * 	bank | index | usage
406  * 	0    |  0:1  | temps (current index)[-]
407  * 	2    |  2:3  | stride[32]
408  * 	1    |  4:5  | base address[64]
409  * 	3    |  6:7  | ctrl[64]
410  * \endverbatim
411  *
412  *  Each DMA whose stride > 0 requires one entry, laid out as above. We stride
413  * 	over the banks to ensure that each ddmad reads each of its operands from a
414  * 	different bank (i.e. remove bank clashes)
415  *
416  * 	Note: This is "wasting" const[0:1] and const[2], however these free
417  * 	registers will be used by other, non-ddmad instructions.
418  *
419  * 	The const register usage is maintained in the au8ConstUsage array, the
420  * DDMAD instructions, for example, will utilize the top 5 registers in each
421  * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
422  *
423  * 	Constant Map
424  * 	------------
425  *
426  * 	The constant map is built up as we add PDS instructions and passed back
427  * for the driver to fill in the PDS data section with the correct parameters
428  * for each draw call.
429  *
430  * \param input_program PDS Program description.
431  * \param code Buffer to be filled in with the PDS program. If NULL is provided,
432  *             automatically switch to count-mode, preventing writes to
433  *             unallocated memory.
434  * \param info PDS info structure filled in for the driver, contains the
435  *             constant map.
436  * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
437  * \param dev_info pvr device information struct.
438  */
pvr_pds_generate_vertex_primary_program(struct pvr_pds_vertex_primary_program_input * input_program,uint32_t * code,struct pvr_pds_info * info,bool use_robust_vertex_fetch,const struct pvr_device_info * dev_info)439 void pvr_pds_generate_vertex_primary_program(
440    struct pvr_pds_vertex_primary_program_input *input_program,
441    uint32_t *code,
442    struct pvr_pds_info *info,
443    bool use_robust_vertex_fetch,
444    const struct pvr_device_info *dev_info)
445 {
446    struct pvr_pds_const_map_entry_write_state entry_write_state;
447    struct pvr_const_map_entry_doutu_address *doutu_address_entry;
448 
449    uint32_t instruction = 0; /* index into code */
450    uint32_t index; /* index used for current attribute, either vertex or
451                     * instance.
452                     */
453 
454    uint32_t total_dma_count = 0;
455    uint32_t running_dma_count = 0;
456 
457    uint32_t write_instance_control = ~0;
458    uint32_t write_vertex_control = ~0;
459    uint32_t write_base_instance_control = ~0;
460    uint32_t write_base_vertex_control = ~0;
461    uint32_t pvr_write_draw_index_control = ~0;
462 
463    uint32_t ddmad_count = 0;
464    uint32_t doutw_count = 0;
465 
466    uint32_t base_instance = 0;
467    uint32_t base_vertex = 0;
468    uint32_t draw_index = 0;
469 
470    uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
471 
472    struct pvr_temp_usage temp_usage = { 0 };
473 
474    uint32_t zero_temp = PVR_INVALID_TEMP;
475 
476    uint32_t max_index_temp = PVR_INVALID_TEMP;
477    uint32_t current_index_temp = PVR_INVALID_TEMP;
478 
479    uint32_t index_id_temp = PVR_INVALID_TEMP;
480    uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
481    uint32_t instance_ID_temp = PVR_INVALID_TEMP;
482 
483    /* Debug tracing of program flags. */
484    pvr_debug("pvr_pds_generate_vertex_primary_program");
485    pvr_debug("=================================================");
486    pvr_debug_pds_flag(input_program->flags,
487                       PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
488    pvr_debug_pds_flag(input_program->flags,
489                       PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
490    pvr_debug_pds_flag(input_program->flags,
491                       PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
492    pvr_debug_pds_flag(input_program->flags,
493                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
494    pvr_debug_pds_flag(input_program->flags,
495                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
496    pvr_debug_pds_flag(input_program->flags,
497                       PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
498    pvr_debug_pds_flag(input_program->flags,
499                       PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
500    pvr_debug(" ");
501 
502    pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
503 
504    /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
505     * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
506     */
507    info->data_size_in_dwords = 4;
508 
509    /* Reserve 2 temps - these are automatically filled in by the VDM
510     *
511     * For instanced draw calls we manually increment the instance id by the
512     * base-instance offset which is either provided as a constant, or in a
513     * ptemp (for draw indirect)
514     *
515     * temp - contents
516     * ---------------
517     * 0    - index id (pre-filled)
518     * 1    - base instance + instance id
519     */
520    index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
521    instance_ID_temp =
522       pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
523 
524    /* Reserve the lowest 2 dwords for DOUTU.
525     * [------XX]
526     */
527    const_usage[0] = 0x03;
528 
529    /* Reserve consts for all the DDMAD's. */
530    for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
531       /* Mark the consts required by this ddmad "in-use".
532        * [XXXXX---]
533        */
534       const_usage[ddmad_count++] |= 0xf8;
535    }
536 
537    /* Start off by assuming we can fit everything in the 8 dwords/ddmad
538     * footprint, if any DOUTD/DOUTW falls outside we will increase this
539     * counter.
540     */
541    if (ddmad_count)
542       info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
543 
544    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
545       doutw_count++;
546       write_vertex_control =
547          pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
548    }
549 
550    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
551       doutw_count++;
552       write_instance_control = pvr_find_constant(const_usage,
553                                                  RESERVE_32BIT,
554                                                  "Instance id DOUTW Ctrl");
555    }
556 
557    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
558       doutw_count++;
559       write_base_instance_control =
560          pvr_find_constant(const_usage,
561                            RESERVE_32BIT,
562                            "Base Instance DOUTW Ctrl");
563    }
564 
565    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
566       doutw_count++;
567       write_base_vertex_control = pvr_find_constant(const_usage,
568                                                     RESERVE_32BIT,
569                                                     "Base Vertex DOUTW Ctrl");
570 
571       /* Load base vertex from constant for non-indirect variants. */
572       if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
573           0) {
574          struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
575             (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
576 
577          base_vertex =
578             pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
579 
580          psBaseVertexEntry =
581             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
582                                                  sizeof(*psBaseVertexEntry));
583          psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
584          psBaseVertexEntry->const_offset = base_vertex;
585       }
586    }
587 
588    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
589       doutw_count++;
590       pvr_write_draw_index_control =
591          pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
592 
593       /* Set draw index to 0 for non-indirect variants. */
594       if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
595           0) {
596          struct pvr_const_map_entry_literal32 *literal_entry;
597 
598          draw_index =
599             pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
600 
601          literal_entry =
602             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
603                                                  sizeof(*literal_entry));
604          literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
605          literal_entry->const_offset = draw_index;
606          literal_entry->literal_value = 0;
607       }
608    }
609 
610    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
611       /* Load absolute instance id into uiInstanceIdTemp. */
612       PVR_PDS_MODE_TOGGLE(
613          code,
614          instruction,
615          pvr_pds_inst_encode_add32(
616             /* cc    */ 0,
617             /* alum  */ 0,
618             /* sna   */ 0,
619             /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
620             /* src1  */ R32_T(instance_ID_temp),
621             /* dst   */ R32TP_T(instance_ID_temp)));
622    } else if (input_program->flags &
623               PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
624       struct pvr_const_map_entry_base_instance *base_instance_entry =
625          (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
626 
627       base_instance =
628          pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
629 
630       PVR_PDS_MODE_TOGGLE(code,
631                           instruction,
632                           pvr_pds_inst_encode_add32(
633                              /* cc    */ 0,
634                              /* alum  */ 0,
635                              /* sna   */ 0,
636                              /* src0  */ R32_C(base_instance),
637                              /* src1  */ R32_T(instance_ID_temp),
638                              /* dst   */ R32TP_T(instance_ID_temp)));
639 
640       base_instance_entry =
641          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
642                                               sizeof(*base_instance_entry));
643       base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
644       base_instance_entry->const_offset = base_instance;
645    } else if (input_program->flags &
646               PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
647       struct pvr_const_map_entry_base_instance *base_instance_entry =
648          (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
649 
650       base_instance = pvr_find_constant(const_usage,
651                                         RESERVE_32BIT,
652                                         "base_instance (Driver Const)");
653 
654       /* Base instance provided by the driver. */
655       base_instance_entry =
656          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
657                                               sizeof(*base_instance_entry));
658       base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
659       base_instance_entry->const_offset = base_instance;
660    }
661 
662    total_dma_count = ddmad_count;
663 
664    total_dma_count += doutw_count;
665 
666    if (use_robust_vertex_fetch) {
667       pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
668 
669       if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
670          zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
671 
672          /* Load 0 into instance_ID_temp. */
673          PVR_PDS_MODE_TOGGLE(code,
674                              instruction,
675                              pvr_pds_inst_encode_limm(0, /* cc */
676                                                       zero_temp, /* SRC1 */
677                                                       0, /* SRC0 */
678                                                       0 /* GR */
679                                                       ));
680       } else {
681          zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
682 
683          max_index_temp =
684             pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
685          current_index_temp =
686             pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
687 
688          PVR_PDS_MODE_TOGGLE(code,
689                              instruction,
690                              pvr_pds_inst_encode_sftlp64(
691                                 0, /* cc */
692                                 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
693                                 1, /* IM */
694                                 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
695                                                           */
696                                 R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
697                                                           */
698                                 0, /* SRC2 (REGS32) */
699                                 R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
700                                 ));
701          PVR_PDS_MODE_TOGGLE(code,
702                              instruction,
703                              pvr_pds_inst_encode_sftlp64(
704                                 0, /* cc */
705                                 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
706                                 1, /* IM */
707                                 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
708                                                           */
709                                 0, /* SRC1 (REGS64TP) */
710                                 0, /* SRC2 (REGS32) */
711                                 R64TP_T(current_index_temp >> 1) /* DST */
712                                 /* (REG64TP) */
713                                 ));
714          PVR_PDS_MODE_TOGGLE(code,
715                              instruction,
716                              pvr_pds_inst_encode_sftlp64(
717                                 0, /* cc */
718                                 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
719                                 1, /* IM */
720                                 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
721                                                           */
722                                 0, /* SRC1 (REGS64TP) */
723                                 0, /* SRC2 (REGS32) */
724                                 R64TP_T(max_index_temp >> 1) /* DST */
725                                 /* (REG64TP) */
726                                 ));
727       }
728    }
729 
730    if (input_program->dma_count && use_robust_vertex_fetch) {
731       PVR_PDS_MODE_TOGGLE(
732          code,
733          instruction,
734          pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
735                                  0, /* Neg */
736                                  PVR_HAS_FEATURE(dev_info, pds_ddmadt)
737                                     ? PVR_ROGUE_PDSINST_PREDICATE_OOB
738                                     : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
739                                  1 /* Addr */
740                                  ));
741    }
742 
743    for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
744       uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
745       uint32_t control_word;
746       struct pvr_const_map_entry_literal32 *literal_entry;
747 
748       const struct pvr_pds_vertex_dma *vertex_dma =
749          &input_program->dma_list[dma];
750       bool last_dma = (++running_dma_count == total_dma_count);
751 
752       pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_dma);
753 
754       /* The id we use to index into this dma. */
755       if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
756          pvr_debug_pds_note("Instance Rate (divisor = %d)",
757                             vertex_dma->divisor);
758 
759          /* 4    - madd 0 - needs to be 64-bit aligned
760           * 5    - madd 1
761           */
762          if (vertex_dma->divisor > 1) {
763             const uint32_t adjusted_instance_ID_temp =
764                pvr_get_temps(&temp_usage,
765                              RESERVE_64BIT,
766                              "adjusted_instance_ID_temp");
767             const uint32_t MADD_temp =
768                pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
769 
770             /* 1. Remove base instance value from temp 1 to get instance id
771              * 2. Divide the instance id by the divisor - Iout = (Iin *
772              *    Multiplier) >> (shift+31)
773              * 3. Add the base instance back on.
774              *
775              * Need two zero temps for the add part of the later MAD.
776              */
777 
778             PVR_PDS_MODE_TOGGLE(code,
779                                 instruction,
780                                 pvr_pds_inst_encode_add64(
781                                    /* cc    */ 0,
782                                    /* alum  */ 0,
783                                    /* sna   */ 1,
784                                    /* src0  */ R64_T(MADD_temp >> 1),
785                                    /* src1  */ R64_T(MADD_temp >> 1),
786                                    /* dst   */ R64TP_T(MADD_temp >> 1)));
787 
788             if (input_program->flags &
789                 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
790                /* Subtract base instance from temp 1, put into
791                 * adjusted_instance_ID_temp.
792                 */
793                PVR_PDS_MODE_TOGGLE(
794                   code,
795                   instruction,
796                   pvr_pds_inst_encode_add32(
797                      /* cc    */ 0,
798                      /* alum  */ 0,
799                      /* sna   */ 1,
800                      /* src0  */ R32_T(instance_ID_temp),
801                      /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
802                      /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
803             } else if (input_program->flags &
804                        PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
805                /* Subtract base instance from temp 1, put into
806                 * adjusted_instance_ID_temp.
807                 */
808                PVR_PDS_MODE_TOGGLE(
809                   code,
810                   instruction,
811                   pvr_pds_inst_encode_add32(
812                      /* cc    */ 0,
813                      /* alum  */ 0,
814                      /* sna   */ 1,
815                      /* src0  */ R32_T(instance_ID_temp),
816                      /* src1  */ R32_C(base_instance),
817                      /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
818             } else {
819                /* Copy instance from temp 1 to adjusted_instance_ID_temp.
820                 */
821                PVR_PDS_MODE_TOGGLE(
822                   code,
823                   instruction,
824                   pvr_pds_inst_encode_add32(
825                      /* cc    */ 0,
826                      /* alum  */ 0,
827                      /* sna   */ 0,
828                      /* src0  */ R32_T(instance_ID_temp),
829                      /* src1  */ R32_T(MADD_temp), /* MADD_temp is set
830                                                     * to 0 at this point.
831                                                     */
832                      /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
833             }
834 
835             /* shift = the bit of the next highest power of two. */
836             uint32_t shift_unsigned =
837                (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
838             int32_t shift = (int32_t)shift_unsigned;
839             uint32_t shift_2s_comp;
840 
841             pvr_debug_pds_note(
842                "Perform instance rate divide (as integer multiply and rshift)");
843 
844             const uint32_t multipier_constant =
845                pvr_find_constant(const_usage,
846                                  RESERVE_32BIT,
847                                  "MultiplierConstant (for InstanceDivisor)");
848 
849             /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
850                note: the division above is integer division. */
851             uint64_t multipier64 =
852                (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
853                            ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
854                           (uint64_t)vertex_dma->divisor);
855             uint32_t multiplier = (uint32_t)multipier64;
856 
857             pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
858                                multiplier);
859             pvr_debug_pds_note(" - Value of Shift = %d", shift);
860 
861             literal_entry =
862                pvr_prepare_next_pds_const_map_entry(&entry_write_state,
863                                                     sizeof(*literal_entry));
864             literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
865             literal_entry->const_offset = multipier_constant;
866             literal_entry->literal_value = multiplier;
867 
868             /* (Iin * Multiplier) */
869             PVR_PDS_MODE_TOGGLE(
870                code,
871                instruction,
872                pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
873                                          0, /* Unsigned ALU mode */
874                                          0, /* Unconditional */
875                                          R32_C(multipier_constant),
876                                          R32_T(adjusted_instance_ID_temp),
877                                          R64_T(MADD_temp / 2),
878                                          R64TP_T(MADD_temp / 2)));
879 
880             /*  >> (shift + 31) */
881             shift += 31;
882             shift *= -1;
883 
884             if (shift < -31) {
885                /* >> (31) */
886                shift_2s_comp = 0xFFFE1;
887                PVR_PDS_MODE_TOGGLE(code,
888                                    instruction,
889                                    pvr_pds_inst_encode_sftlp64(
890                                       /* cc */ 0,
891                                       /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
892                                       /* IM */ 1, /*  enable immediate */
893                                       /* SRC0 */ R64_T(MADD_temp / 2),
894                                       /* SRC1 */ 0, /* This won't be used
895                                                        in a shift
896                                                        operation. */
897                                       /* SRC2 (Shift) */ shift_2s_comp,
898                                       /* DST */ R64TP_T(MADD_temp / 2)));
899                shift += 31;
900             }
901 
902             /* >> (shift + 31) */
903             shift_2s_comp = *((uint32_t *)&shift);
904             PVR_PDS_MODE_TOGGLE(code,
905                                 instruction,
906                                 pvr_pds_inst_encode_sftlp64(
907                                    /* cc */ 0,
908                                    /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
909                                    /* IM */ 1, /*  enable immediate */
910                                    /* SRC0 */ R64_T(MADD_temp / 2),
911                                    /* SRC1 */ 0, /* This won't be used
912                                                   * in a shift
913                                                   * operation. */
914                                    /* SRC2 (Shift) */ shift_2s_comp,
915                                    /* DST */ R64TP_T(MADD_temp / 2)));
916 
917             if (input_program->flags &
918                 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
919                /* Add base instance. */
920                PVR_PDS_MODE_TOGGLE(
921                   code,
922                   instruction,
923                   pvr_pds_inst_encode_add32(
924                      /* cc    */ 0,
925                      /* alum  */ 0,
926                      /* sna   */ 0,
927                      /* src0  */ R32_T(MADD_temp),
928                      /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
929                      /* dst   */ R32TP_T(MADD_temp)));
930             } else if (input_program->flags &
931                        PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
932                /* Add base instance. */
933                PVR_PDS_MODE_TOGGLE(code,
934                                    instruction,
935                                    pvr_pds_inst_encode_add32(
936                                       /* cc    */ 0,
937                                       /* alum  */ 0,
938                                       /* sna   */ 0,
939                                       /* src0  */ R32_T(MADD_temp),
940                                       /* src1  */ R32_C(base_instance),
941                                       /* dst   */ R32TP_T(MADD_temp)));
942             }
943 
944             pvr_debug_pds_note(
945                "DMA Vertex Index will be sourced from 'MADD_temp'");
946             index = MADD_temp;
947          } else if (vertex_dma->divisor == 0) {
948             if (base_instance_ID_temp == PVR_INVALID_TEMP) {
949                base_instance_ID_temp = pvr_get_temps(&temp_usage,
950                                                      RESERVE_32BIT,
951                                                      "uBaseInstanceIDTemp");
952             }
953 
954             /* Load 0 into instance_ID_temp. */
955             PVR_PDS_MODE_TOGGLE(code,
956                                 instruction,
957                                 pvr_pds_inst_encode_limm(
958                                    /* cc       */ 0,
959                                    /* src1     */ base_instance_ID_temp,
960                                    /* src0     */ 0,
961                                    /* gr       */ 0));
962 
963             if (input_program->flags &
964                 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
965                /* Add base instance. */
966                PVR_PDS_MODE_TOGGLE(
967                   code,
968                   instruction,
969                   pvr_pds_inst_encode_add32(
970                      /* cc    */ 0,
971                      /* alum  */ 0,
972                      /* sna   */ 0,
973                      /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
974                      /* src1  */ R32_T(base_instance_ID_temp),
975                      /* dst   */ R32TP_T(base_instance_ID_temp)));
976 
977             } else if (input_program->flags &
978                        PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
979                /* Add base instance. */
980                PVR_PDS_MODE_TOGGLE(
981                   code,
982                   instruction,
983                   pvr_pds_inst_encode_add32(
984                      /* cc    */ 0,
985                      /* alum  */ 0,
986                      /* sna   */ 0,
987                      /* src0  */ R32_C(base_instance),
988                      /* src1  */ R32_T(base_instance_ID_temp),
989                      /* dst   */ R32TP_T(base_instance_ID_temp)));
990             }
991 
992             pvr_debug_pds_note(
993                "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
994             index = base_instance_ID_temp;
995          } else {
996             pvr_debug_pds_note(
997                "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
998             index = instance_ID_temp;
999          }
1000       } else {
1001          pvr_debug_pds_note(
1002             "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
1003          index = index_id_temp;
1004       }
1005 
1006       /* DDMAD Const Usage [__XX_---] */
1007       pvr_write_pds_const_map_entry_vertex_attribute_address(
1008          &entry_write_state,
1009          vertex_dma,
1010          const_base + 4,
1011          use_robust_vertex_fetch);
1012 
1013       /* DDMAD Const Usage [__XXX---] */
1014       literal_entry =
1015          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1016                                               sizeof(*literal_entry));
1017       literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1018       literal_entry->const_offset = const_base + 3;
1019       literal_entry->literal_value = vertex_dma->stride;
1020 
1021       control_word = vertex_dma->size_in_dwords
1022                      << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1023       control_word |= vertex_dma->destination
1024                       << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1025       control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1026                        PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1027 
1028       /* DDMADT instructions will do a dummy doutd when OOB if
1029        * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
1030        * would need to do another doutd after an OOB DDMADT to provide the 'in
1031        * bounds' data the DDMADT can't be set as LAST.
1032        *
1033        * This requires us to include a final dummy DDMAD.LAST instruction.
1034        *
1035        * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
1036        * Specification.doc
1037        *
1038        *	DDMAD src0,src1,src2,src3
1039        *
1040        *	calculated_source_address := src0*src1+src2
1041        *	base_address              := src2
1042        *	dma_parameters            := src3[31:0]
1043        *	buffer_size               := src3[63:33]
1044        *	test                      := src3[32]
1045        *
1046        *	if (test == 1) {
1047        *	   // DDMAD(T)
1048        *	   if (calculated_source_address[39:0] + (burst_size<<2) <=
1049        *         base_address[39:0] + buffer_size) {
1050        *        OOB := 0
1051        *        DOUTD calculated_source_address,dma_paramters
1052        *     } else {
1053        *        OOB := 1
1054        *        if (last_instance == 1) {
1055        *           dma_parameters[BURST_SIZE] := 0
1056        *           DOUTD calculated_source_address,dma_paramters
1057        *	      }
1058        *	   }
1059        *	} else {
1060        *	   // DDMAD
1061        *	   DOUTD calculated_source_address,dma_paramters
1062        *	}
1063        */
1064 
1065       if (last_dma && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
1066                        !use_robust_vertex_fetch)) {
1067          pvr_debug_pds_note("LAST DDMAD");
1068          control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1069       }
1070 
1071       /* DDMAD Const Usage [_XXXX---] */
1072       literal_entry =
1073          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1074                                               sizeof(*literal_entry));
1075       literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1076       literal_entry->const_offset = (const_base + 6);
1077       literal_entry->literal_value = control_word;
1078 
1079       if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1080          /* DDMAD Const Usage [XXXXX---]
1081           * With DDMADT an extra 32bits of SRC3 contains the information for
1082           * performing out-of-bounds tests on the DMA.
1083           */
1084 
1085          if (use_robust_vertex_fetch) {
1086             struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
1087                *obb_buffer_size;
1088             obb_buffer_size =
1089                pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1090                                                     sizeof(*obb_buffer_size));
1091 
1092             obb_buffer_size->type =
1093                PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
1094             obb_buffer_size->const_offset = const_base + 7;
1095             obb_buffer_size->binding_index = vertex_dma->binding_index;
1096          } else {
1097             literal_entry =
1098                pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1099                                                     sizeof(*literal_entry));
1100             literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1101             literal_entry->const_offset = const_base + 7;
1102             literal_entry->literal_value = 0;
1103          }
1104 
1105          PVR_PDS_MODE_TOGGLE(
1106             code,
1107             instruction,
1108             pvr_pds_inst_encode_ddmad(0, /* cc */
1109                                       0, /* END */
1110                                       R32_C(const_base + 3), /* SRC0 (REGS32) */
1111                                       index, /* SRC1 (REGS32T) */
1112                                       R64_C((const_base + 4) >> 1), /* SRC2
1113                                                                      * (REGS64)
1114                                                                      */
1115                                       R64_C((const_base + 6) >> 1) /* SRC3
1116                                                                     * (REGS64C)
1117                                                                     */
1118                                       ));
1119 
1120          if (use_robust_vertex_fetch) {
1121             /* If not out of bounds, skip next DDMAD instructions. */
1122             PVR_PDS_MODE_TOGGLE(code,
1123                                 instruction,
1124                                 pvr_pds_inst_encode_ddmad(
1125                                    1, /* cc */
1126                                    0, /* END */
1127                                    R32_C(const_base + 3), /* SRC0 (REGS32) */
1128                                    R32_T(zero_temp), /* SRC1 (REGS32T) */
1129                                    R64_C((const_base + 4) >> 1), /* SRC2
1130                                                                   * (REGS64)
1131                                                                   */
1132                                    R64_C((const_base + 6) >> 1) /* SRC3
1133                                                                  * (REGS64C)
1134                                                                  */
1135                                    ));
1136 
1137             /* Now the driver must have a dummy DDMAD marked as last. */
1138             if (last_dma) {
1139                uint32_t dummy_dma_const = pvr_find_constant(const_usage,
1140                                                             RESERVE_64BIT,
1141                                                             "uDummyDMAConst");
1142                uint32_t zero_const =
1143                   pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
1144 
1145                literal_entry =
1146                   pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1147                                                        sizeof(*literal_entry));
1148                literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1149                literal_entry->const_offset = zero_const;
1150                literal_entry->literal_value = 0;
1151 
1152                literal_entry =
1153                   pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1154                                                        sizeof(*literal_entry));
1155                literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1156                literal_entry->const_offset = zero_const + 1;
1157                literal_entry->literal_value = 0;
1158 
1159                literal_entry =
1160                   pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1161                                                        sizeof(*literal_entry));
1162                literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1163                literal_entry->const_offset = dummy_dma_const;
1164                literal_entry->literal_value = 0;
1165 
1166                literal_entry->literal_value |=
1167                   0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1168                literal_entry->literal_value |=
1169                   (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1170                    PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1171                literal_entry->literal_value |=
1172                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1173 
1174                literal_entry =
1175                   pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1176                                                        sizeof(*literal_entry));
1177                literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1178                literal_entry->const_offset = dummy_dma_const + 1;
1179                literal_entry->literal_value = 0;
1180 
1181                PVR_PDS_MODE_TOGGLE(code,
1182                                    instruction,
1183                                    pvr_pds_inst_encode_ddmad(
1184                                       0, /* cc */
1185                                       0, /* END */
1186                                       R32_C(zero_const), /* SRC0 (REGS32)
1187                                                           */
1188                                       R32_T(zero_temp), /* SRC1 (REGS32T)
1189                                                          */
1190                                       R64_C((dummy_dma_const) >> 1), /* SRC2
1191                                                                         (REGS64)
1192                                                                      */
1193                                       R64_C((dummy_dma_const) >> 1) /* SRC3
1194                                                                        (REGS64C)
1195                                                                     */
1196                                       ));
1197             }
1198          }
1199       } else {
1200          if (use_robust_vertex_fetch) {
1201             struct pvr_const_map_entry_vertex_attribute_max_index
1202                *max_index_entry;
1203 
1204             pvr_debug("RobustVertexFetch DDMAD");
1205 
1206             const uint32_t max_index_const =
1207                pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
1208 
1209             max_index_entry =
1210                pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1211                                                     sizeof(*max_index_entry));
1212             max_index_entry->const_offset = max_index_const;
1213             max_index_entry->type =
1214                PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
1215             max_index_entry->binding_index = vertex_dma->binding_index;
1216             max_index_entry->offset = vertex_dma->offset;
1217             max_index_entry->stride = vertex_dma->stride;
1218             max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
1219             max_index_entry->component_size_in_bytes =
1220                vertex_dma->component_size_in_bytes;
1221 
1222             PVR_PDS_MODE_TOGGLE(
1223                code,
1224                instruction,
1225                pvr_pds_inst_encode_add32(0, /* cc */
1226                                          0, /* ALUM */
1227                                          PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
1228                                          R32_C(max_index_const), /* SRC0
1229                                                                   * (REGS32)
1230                                                                   */
1231                                          R32_T(zero_temp), /* SRC1 (REGS32) */
1232                                          R32TP_T(max_index_temp) /* DST
1233                                                                   * (REG32TP)
1234                                                                   */
1235                                          ));
1236 
1237             PVR_PDS_MODE_TOGGLE(code,
1238                                 instruction,
1239                                 pvr_pds_inst_encode_sftlp32(
1240                                    1, /* IM */
1241                                    0, /* cc */
1242                                    PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1243                                    index, /* SRC0 (REGS32T) */
1244                                    0, /* SRC1 (REGS32) */
1245                                    0, /* SRC2 (REG32TP) */
1246                                    R32TP_T(current_index_temp) /* DST
1247                                                                 * (REG32TP)
1248                                                                 */
1249                                    ));
1250 
1251             PVR_PDS_MODE_TOGGLE(
1252                code,
1253                instruction,
1254                pvr_pds_inst_encode_cmp(
1255                   0, /* cc enable */
1256                   PVR_ROGUE_PDSINST_COP_GT, /* Operation */
1257                   R64TP_T(current_index_temp >> 1), /* SRC
1258                                                      * (REGS64TP)
1259                                                      */
1260                   R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
1261                   ));
1262 
1263             PVR_PDS_MODE_TOGGLE(code,
1264                                 instruction,
1265                                 pvr_pds_inst_encode_sftlp32(
1266                                    1, /* IM */
1267                                    1, /* cc */
1268                                    PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1269                                    zero_temp, /* SRC0 (REGS32T) */
1270                                    0, /* SRC1 (REGS32) */
1271                                    0, /* SRC2 (REG32TP) */
1272                                    R32TP_T(current_index_temp) /* DST
1273                                                                 * (REG32TP)
1274                                                                 */
1275                                    ));
1276 
1277             PVR_PDS_MODE_TOGGLE(code,
1278                                 instruction,
1279                                 pvr_pds_inst_encode_ddmad(
1280                                    0, /* cc  */
1281                                    0, /* END */
1282                                    R32_C(const_base + 3), /* SRC0 (REGS32) */
1283                                    current_index_temp, /* SRC1 (REGS32T) */
1284                                    R64_C((const_base + 4) >> 1), /* SRC2
1285                                                                   * (REGS64)
1286                                                                   */
1287                                    (const_base + 6) >> 1 /* SRC3 (REGS64C) */
1288                                    ));
1289          } else {
1290             PVR_PDS_MODE_TOGGLE(code,
1291                                 instruction,
1292                                 pvr_pds_inst_encode_ddmad(
1293                                    /* cc    */ 0,
1294                                    /* end   */ 0,
1295                                    /* src0  */ R32_C(const_base + 3),
1296                                    /* src1  */ (index),
1297                                    /* src2  */ R64_C((const_base + 4) >> 1),
1298                                    /* src3  */ (const_base + 6) >> 1));
1299          }
1300       }
1301    }
1302 
1303    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
1304       bool last_dma = (++running_dma_count == total_dma_count);
1305 
1306       PVR_PDS_MODE_TOGGLE(
1307          code,
1308          instruction,
1309          pvr_encode_direct_write(
1310             &entry_write_state,
1311             last_dma,
1312             false,
1313             R64_C(write_vertex_control),
1314             R64_T(0),
1315             0x1,
1316             input_program->vertex_id_register,
1317             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1318             dev_info));
1319    }
1320 
1321    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
1322       bool last_dma = (++running_dma_count == total_dma_count);
1323 
1324       PVR_PDS_MODE_TOGGLE(
1325          code,
1326          instruction,
1327          pvr_encode_direct_write(
1328             &entry_write_state,
1329             last_dma,
1330             false,
1331             R64_C(write_instance_control),
1332             R64_T(0),
1333             0x2,
1334             input_program->instance_id_register,
1335             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1336             dev_info));
1337    }
1338 
1339    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
1340       bool last_dma = (++running_dma_count == total_dma_count);
1341 
1342       if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1343          /* Base instance comes from ptemp 1. */
1344          PVR_PDS_MODE_TOGGLE(
1345             code,
1346             instruction,
1347             pvr_encode_direct_write(
1348                &entry_write_state,
1349                last_dma,
1350                false,
1351                R64_C(write_base_instance_control),
1352                R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
1353                0x2,
1354                input_program->base_instance_register,
1355                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1356                dev_info));
1357       } else {
1358          uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
1359 
1360          /* Base instance comes from driver constant. */
1361          PVR_PDS_MODE_TOGGLE(
1362             code,
1363             instruction,
1364             pvr_encode_direct_write(
1365                &entry_write_state,
1366                last_dma,
1367                false,
1368                R64_C(write_base_instance_control),
1369                R64_C(base_instance >> 1),
1370                data_mask,
1371                input_program->base_instance_register,
1372                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1373                dev_info));
1374       }
1375    }
1376 
1377    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
1378       bool last_dma = (++running_dma_count == total_dma_count);
1379 
1380       if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1381          /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
1382          PVR_PDS_MODE_TOGGLE(
1383             code,
1384             instruction,
1385             pvr_encode_direct_write(
1386                &entry_write_state,
1387                last_dma,
1388                false,
1389                R64_C(write_base_vertex_control),
1390                R64_P(0),
1391                0x1,
1392                input_program->base_vertex_register,
1393                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1394                dev_info));
1395       } else {
1396          uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
1397 
1398          /* Base vertex comes from driver constant (literal 0). */
1399          PVR_PDS_MODE_TOGGLE(
1400             code,
1401             instruction,
1402             pvr_encode_direct_write(
1403                &entry_write_state,
1404                last_dma,
1405                false,
1406                R64_C(write_base_vertex_control),
1407                R64_C(base_vertex >> 1),
1408                data_mask,
1409                input_program->base_vertex_register,
1410                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1411                dev_info));
1412       }
1413    }
1414 
1415    if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
1416       bool last_dma = (++running_dma_count == total_dma_count);
1417 
1418       if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1419          /* Draw index comes from ptemp 3. */
1420          PVR_PDS_MODE_TOGGLE(
1421             code,
1422             instruction,
1423             pvr_encode_direct_write(
1424                &entry_write_state,
1425                last_dma,
1426                false,
1427                R64_C(pvr_write_draw_index_control),
1428                R64_P(1),
1429                0x2,
1430                input_program->draw_index_register,
1431                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1432                dev_info));
1433       } else {
1434          uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
1435 
1436          /* Draw index comes from constant (literal 0). */
1437          PVR_PDS_MODE_TOGGLE(
1438             code,
1439             instruction,
1440             pvr_encode_direct_write(
1441                &entry_write_state,
1442                last_dma,
1443                false,
1444                R64_C(pvr_write_draw_index_control),
1445                R64_C(draw_index >> 1),
1446                data_mask,
1447                input_program->draw_index_register,
1448                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1449                dev_info));
1450       }
1451    }
1452 
1453    doutu_address_entry =
1454       pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1455                                            sizeof(*doutu_address_entry));
1456    doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1457    doutu_address_entry->const_offset = 0;
1458    doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
1459 
1460    if (use_robust_vertex_fetch) {
1461       /* Restore IF0 */
1462       PVR_PDS_MODE_TOGGLE(
1463          code,
1464          instruction,
1465          pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
1466                                  0, /* Neg */
1467                                  PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
1468                                  1 /* Addr */
1469                                  ));
1470    }
1471 
1472    PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
1473    PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
1474 
1475    assert(running_dma_count == total_dma_count);
1476 
1477    for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
1478       if (const_usage[i] == 0)
1479          break;
1480 
1481       info->data_size_in_dwords =
1482          8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
1483    }
1484 
1485    info->temps_required = temp_usage.temps_needed;
1486    info->entry_count = entry_write_state.entry_count;
1487    info->entries_written_size_in_bytes =
1488       entry_write_state.entries_size_in_bytes;
1489    info->code_size_in_dwords = instruction;
1490 
1491    pvr_debug("=================================================\n");
1492 }
1493 
pvr_pds_generate_descriptor_upload_program(struct pvr_pds_descriptor_program_input * input_program,uint32_t * code_section,struct pvr_pds_info * info)1494 void pvr_pds_generate_descriptor_upload_program(
1495    struct pvr_pds_descriptor_program_input *input_program,
1496    uint32_t *code_section,
1497    struct pvr_pds_info *info)
1498 {
1499    unsigned int num_consts64;
1500    unsigned int num_consts32;
1501    unsigned int next_const64;
1502    unsigned int next_const32;
1503    unsigned int instruction = 0;
1504    uint32_t compile_time_buffer_index = 0;
1505 
1506    unsigned int total_dma_count = 0;
1507    unsigned int running_dma_count = 0;
1508 
1509    struct pvr_pds_const_map_entry_write_state entry_write_state;
1510 
1511    /* Calculate the total register usage so we can stick 32-bit consts
1512     * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
1513     * constant.
1514     */
1515    num_consts32 = input_program->descriptor_set_count;
1516    num_consts64 = input_program->descriptor_set_count;
1517    total_dma_count = input_program->descriptor_set_count;
1518 
1519    /* 1 DOUTD for buffer containing address literals. */
1520    if (input_program->addr_literal_count > 0) {
1521       num_consts32++;
1522       num_consts64++;
1523       total_dma_count++;
1524    }
1525 
1526    pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
1527 
1528    for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1529       struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1530 
1531       /* This switch statement looks pointless but we want to optimize DMAs
1532        * that can be done as a DOUTW.
1533        */
1534       switch (buffer->type) {
1535       default: {
1536          /* 1 DOUTD per compile time buffer: */
1537          num_consts32++;
1538          num_consts64++;
1539          total_dma_count++;
1540          break;
1541       }
1542       }
1543    }
1544 
1545    /* DOUTU for the secondary update program requires a 64-bit constant. */
1546    if (input_program->secondary_program_present)
1547       num_consts64++;
1548 
1549    info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
1550 
1551    /* Start counting constants. */
1552    next_const64 = 0;
1553    next_const32 = num_consts64 * 2;
1554 
1555    if (input_program->addr_literal_count > 0) {
1556       bool last_dma = (++running_dma_count == total_dma_count);
1557       bool halt = last_dma && !input_program->secondary_program_present;
1558 
1559       unsigned int size_in_dwords = input_program->addr_literal_count *
1560                                     sizeof(uint64_t) / sizeof(uint32_t);
1561       unsigned int destination = input_program->addr_literals[0].destination;
1562 
1563       struct pvr_pds_const_map_entry_addr_literal_buffer
1564          *addr_literal_buffer_entry;
1565 
1566       addr_literal_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1567          &entry_write_state,
1568          sizeof(*addr_literal_buffer_entry));
1569 
1570       addr_literal_buffer_entry->type =
1571          PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL_BUFFER;
1572       addr_literal_buffer_entry->size = PVR_DW_TO_BYTES(size_in_dwords);
1573       addr_literal_buffer_entry->const_offset = next_const64 * 2;
1574 
1575       for (unsigned int i = 0; i < input_program->addr_literal_count; i++) {
1576          struct pvr_pds_const_map_entry_addr_literal *addr_literal_entry;
1577 
1578          /* Check that the destinations for the addr literals are contiguous.
1579           * Not supporting non contiguous ranges as that would either require a
1580           * single large buffer with wasted memory for DMA, or multiple buffers
1581           * to DMA.
1582           */
1583          if (i > 0) {
1584             const uint32_t current_addr_literal_destination =
1585                input_program->addr_literals[i].destination;
1586             const uint32_t previous_addr_literal_destination =
1587                input_program->addr_literals[i - 1].destination;
1588 
1589             /* 2 regs to store 64 bits address. */
1590             assert(current_addr_literal_destination ==
1591                    previous_addr_literal_destination + 2);
1592          }
1593 
1594          addr_literal_entry =
1595             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1596                                                  sizeof(*addr_literal_entry));
1597 
1598          addr_literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_ADDR_LITERAL;
1599          addr_literal_entry->addr_type = input_program->addr_literals[i].type;
1600       }
1601 
1602       PVR_PDS_MODE_TOGGLE(code_section,
1603                           instruction,
1604                           pvr_encode_burst_cs(&entry_write_state,
1605                                               last_dma,
1606                                               halt,
1607                                               next_const32,
1608                                               next_const64,
1609                                               size_in_dwords,
1610                                               destination));
1611 
1612       next_const64++;
1613       next_const32++;
1614    }
1615 
1616    /* For each descriptor set perform a DOUTD. */
1617    for (unsigned int descriptor_index = 0;
1618         descriptor_index < input_program->descriptor_set_count;
1619         descriptor_index++) {
1620       struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
1621       struct pvr_pds_descriptor_set *descriptor_set =
1622          &input_program->descriptor_sets[descriptor_index];
1623 
1624       bool last_dma = (++running_dma_count == total_dma_count);
1625       bool halt = last_dma && !input_program->secondary_program_present;
1626 
1627       descriptor_set_entry =
1628          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1629                                               sizeof(*descriptor_set_entry));
1630       descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
1631       descriptor_set_entry->const_offset = next_const64 * 2;
1632       descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
1633       descriptor_set_entry->primary = descriptor_set->primary;
1634       descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
1635 
1636       PVR_PDS_MODE_TOGGLE(code_section,
1637                           instruction,
1638                           pvr_encode_burst_cs(&entry_write_state,
1639                                               last_dma,
1640                                               halt,
1641                                               next_const32,
1642                                               next_const64,
1643                                               descriptor_set->size_in_dwords,
1644                                               descriptor_set->destination));
1645 
1646       next_const64++;
1647       next_const32++;
1648    }
1649 
1650    for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1651       struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1652 
1653       bool last_dma = (++running_dma_count == total_dma_count);
1654       bool halt = last_dma && !input_program->secondary_program_present;
1655 
1656       switch (buffer->type) {
1657       case PVR_BUFFER_TYPE_PUSH_CONSTS: {
1658          struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1659 
1660          special_buffer_entry =
1661             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1662                                                  sizeof(*special_buffer_entry));
1663          special_buffer_entry->type =
1664             PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1665          special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
1666          special_buffer_entry->buffer_index = buffer->source_offset;
1667          break;
1668       }
1669       case PVR_BUFFER_TYPE_DYNAMIC: {
1670          struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1671 
1672          special_buffer_entry =
1673             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1674                                                  sizeof(*special_buffer_entry));
1675          special_buffer_entry->type =
1676             PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1677          special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
1678          special_buffer_entry->buffer_index = buffer->source_offset;
1679          break;
1680       }
1681       case PVR_BUFFER_TYPE_COMPILE_TIME: {
1682          struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1683 
1684          special_buffer_entry =
1685             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1686                                                  sizeof(*special_buffer_entry));
1687          special_buffer_entry->type =
1688             PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1689          special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_COMPILE_TIME;
1690          special_buffer_entry->buffer_index = compile_time_buffer_index++;
1691          break;
1692       }
1693       case PVR_BUFFER_TYPE_BUFFER_LENGTHS: {
1694          struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1695 
1696          special_buffer_entry =
1697             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1698                                                  sizeof(*special_buffer_entry));
1699          special_buffer_entry->type =
1700             PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1701          special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BUFFER_LENGTHS;
1702          break;
1703       }
1704       case PVR_BUFFER_TYPE_BLEND_CONSTS: {
1705          struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1706 
1707          special_buffer_entry =
1708             pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1709                                                  sizeof(*special_buffer_entry));
1710          special_buffer_entry->type =
1711             PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1712          special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
1713          special_buffer_entry->buffer_index =
1714             input_program->blend_constants_used_mask;
1715          break;
1716       }
1717       case PVR_BUFFER_TYPE_UBO: {
1718          struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
1719 
1720          constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1721             &entry_write_state,
1722             sizeof(*constant_buffer_entry));
1723          constant_buffer_entry->type =
1724             PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
1725          constant_buffer_entry->buffer_id = buffer->buffer_id;
1726          constant_buffer_entry->desc_set = buffer->desc_set;
1727          constant_buffer_entry->binding = buffer->binding;
1728          constant_buffer_entry->offset = buffer->source_offset;
1729          constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1730          break;
1731       }
1732       case PVR_BUFFER_TYPE_UBO_ZEROING: {
1733          struct pvr_const_map_entry_constant_buffer_zeroing
1734             *constant_buffer_entry;
1735 
1736          constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1737             &entry_write_state,
1738             sizeof(*constant_buffer_entry));
1739          constant_buffer_entry->type =
1740             PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
1741          constant_buffer_entry->buffer_id = buffer->buffer_id;
1742          constant_buffer_entry->offset = buffer->source_offset;
1743          constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1744          break;
1745       }
1746       }
1747 
1748       entry_write_state.entry->const_offset = next_const64 * 2;
1749 
1750       PVR_PDS_MODE_TOGGLE(code_section,
1751                           instruction,
1752                           pvr_encode_burst_cs(&entry_write_state,
1753                                               last_dma,
1754                                               halt,
1755                                               next_const32,
1756                                               next_const64,
1757                                               buffer->size_in_dwords,
1758                                               buffer->destination));
1759 
1760       next_const64++;
1761       next_const32++;
1762    }
1763 
1764    if (total_dma_count != running_dma_count)
1765       fprintf(stderr, "Mismatch in DMA count\n");
1766 
1767    if (input_program->secondary_program_present) {
1768       struct pvr_const_map_entry_doutu_address *doutu_address;
1769 
1770       PVR_PDS_MODE_TOGGLE(code_section,
1771                           instruction,
1772                           pvr_pds_encode_doutu(false, true, next_const64));
1773 
1774       doutu_address =
1775          pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1776                                               sizeof(*doutu_address));
1777       doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1778       doutu_address->const_offset = next_const64 * 2;
1779       doutu_address->doutu_control = input_program->secondary_task_control.src0;
1780 
1781       next_const64++;
1782    }
1783 
1784    if (instruction == 0 && input_program->must_not_be_empty) {
1785       PVR_PDS_MODE_TOGGLE(code_section,
1786                           instruction,
1787                           pvr_pds_inst_encode_halt(
1788                              /* cc */ false));
1789    }
1790 
1791    info->entry_count = entry_write_state.entry_count;
1792    info->entries_written_size_in_bytes =
1793       entry_write_state.entries_size_in_bytes;
1794    info->code_size_in_dwords = instruction;
1795 }
1796