xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pds/pvr_pds.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37 
38 #define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39 #define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40 
41 /*****************************************************************************
42  Macro definitions
43 *****************************************************************************/
44 
45 #define PVR_PDS_DWORD_SHIFT 2
46 
47 #define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48 #define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49 #define PVR_PDS_TEMPS_BLOCK_BASE 128
50 #define PVR_PDS_TEMPS_BLOCK_SIZE 32
51 
52 #define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53 #define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54 
55 /* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56  * available in the coefficient sync task.
57  */
58 #define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59 #define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60 #define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61 /* Local IDs are available in every task. */
62 #define PVR_PDS_CDM_LOCAL_ID_X 0
63 #define PVR_PDS_CDM_LOCAL_ID_YZ 1
64 
65 #define PVR_PDS_DOUTW_LOWER32 0x0
66 #define PVR_PDS_DOUTW_UPPER32 0x1
67 #define PVR_PDS_DOUTW_LOWER64 0x2
68 #define PVR_PDS_DOUTW_LOWER128 0x3
69 #define PVR_PDS_DOUTW_MAXMASK 0x4
70 
71 #define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72 #define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73 
74 /*****************************************************************************
75  Static variables
76 *****************************************************************************/
77 
78 static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79    PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80    PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81    PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82    PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83 };
84 
85 /* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86  * cache_control_const[1].
87  */
88 static const uint32_t cache_control_const[2][2] = {
89    { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91    { 0, 0 }
92 };
93 
94 /*****************************************************************************
95  Function definitions
96 *****************************************************************************/
97 
pvr_pds_encode_ld_src0(uint64_t dest,uint64_t count8,uint64_t src_add,bool cached,const struct pvr_device_info * dev_info)98 uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99                                 uint64_t count8,
100                                 uint64_t src_add,
101                                 bool cached,
102                                 const struct pvr_device_info *dev_info)
103 {
104    uint64_t encoded = 0;
105 
106    if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107       encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108                          : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109    }
110 
111    encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112                << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113    encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114                << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115    encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116                       : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117    encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118                << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119 
120    return encoded;
121 }
122 
pvr_pds_encode_st_src0(uint64_t src,uint64_t count4,uint64_t dst_add,bool write_through,const struct pvr_device_info * device_info)123 uint64_t pvr_pds_encode_st_src0(uint64_t src,
124                                 uint64_t count4,
125                                 uint64_t dst_add,
126                                 bool write_through,
127                                 const struct pvr_device_info *device_info)
128 {
129    uint64_t encoded = 0;
130 
131    if (device_info->features.has_slc_mcu_cache_controls) {
132       encoded |= (write_through
133                      ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134                      : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135    }
136 
137    encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138                << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139    encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140                << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141    encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142                              : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143    encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144                << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145 
146    return encoded;
147 }
148 
149 static ALWAYS_INLINE uint32_t
pvr_pds_encode_doutw_src1(uint32_t dest,uint32_t dword_mask,uint32_t flags,bool cached,const struct pvr_device_info * dev_info)150 pvr_pds_encode_doutw_src1(uint32_t dest,
151                           uint32_t dword_mask,
152                           uint32_t flags,
153                           bool cached,
154                           const struct pvr_device_info *dev_info)
155 {
156    assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157           ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158           (dword_mask < PVR_PDS_DOUTW_LOWER64));
159 
160    uint32_t encoded =
161       (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162 
163    encoded |= dword_mask_const[dword_mask];
164 
165    encoded |= flags;
166 
167    encoded |=
168       cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169                                                                             : 1]
170                          [cached ? 1 : 0];
171    return encoded;
172 }
173 
pvr_pds_encode_doutw64(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)174 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175                                                      uint32_t end,
176                                                      uint32_t src1,
177                                                      uint32_t src0)
178 {
179    return pvr_pds_inst_encode_dout(cc,
180                                    end,
181                                    src1,
182                                    src0,
183                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184 }
185 
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)186 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187                                                    uint32_t end,
188                                                    uint32_t src0)
189 {
190    return pvr_pds_inst_encode_dout(cc,
191                                    end,
192                                    0,
193                                    src0,
194                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195 }
196 
pvr_pds_inst_encode_doutc(uint32_t cc,uint32_t end)197 static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198                                                         uint32_t end)
199 {
200    return pvr_pds_inst_encode_dout(cc,
201                                    end,
202                                    0,
203                                    0,
204                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205 }
206 
pvr_pds_encode_doutd(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)207 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208                                                    uint32_t end,
209                                                    uint32_t src1,
210                                                    uint32_t src0)
211 {
212    return pvr_pds_inst_encode_dout(cc,
213                                    end,
214                                    src1,
215                                    src0,
216                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218 
pvr_pds_encode_douti(uint32_t cc,uint32_t end,uint32_t src0)219 static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220                                                    uint32_t end,
221                                                    uint32_t src0)
222 {
223    return pvr_pds_inst_encode_dout(cc,
224                                    end,
225                                    0,
226                                    src0,
227                                    PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228 }
229 
pvr_pds_encode_bra(uint32_t srcc,uint32_t neg,uint32_t setc,int32_t relative_address)230 static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231                                                  uint32_t neg,
232                                                  uint32_t setc,
233                                                  int32_t relative_address)
234 {
235    /* Address should be signed but API only allows unsigned value. */
236    return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237 }
238 
239 /**
240  * Gets the next constant address and moves the next constant pointer along.
241  *
242  * \param next_constant Pointer to the next constant address.
243  * \param num_constants The number of constants required.
244  * \param count The number of constants allocated.
245  * \return The address of the next constant.
246  */
pvr_pds_get_constants(uint32_t * next_constant,uint32_t num_constants,uint32_t * count)247 static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248                                       uint32_t num_constants,
249                                       uint32_t *count)
250 {
251    uint32_t constant;
252 
253    /* Work out starting constant number. For even number of constants, start on
254     * a 64-bit boundary.
255     */
256    if (num_constants & 1)
257       constant = *next_constant;
258    else
259       constant = (*next_constant + 1) & ~1;
260 
261    /* Update the count with the number of constants actually allocated. */
262    *count += constant + num_constants - *next_constant;
263 
264    /* Move the next constant pointer. */
265    *next_constant = constant + num_constants;
266 
267    assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268 
269    return constant;
270 }
271 
272 /**
273  * Gets the next temp address and moves the next temp pointer along.
274  *
275  * \param next_temp Pointer to the next temp address.
276  * \param num_temps The number of temps required.
277  * \param count The number of temps allocated.
278  * \return The address of the next temp.
279  */
280 static uint32_t
pvr_pds_get_temps(uint32_t * next_temp,uint32_t num_temps,uint32_t * count)281 pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282 {
283    uint32_t temp;
284 
285    /* Work out starting temp number. For even number of temps, start on a
286     * 64-bit boundary.
287     */
288    if (num_temps & 1)
289       temp = *next_temp;
290    else
291       temp = (*next_temp + 1) & ~1;
292 
293    /* Update the count with the number of temps actually allocated. */
294    *count += temp + num_temps - *next_temp;
295 
296    /* Move the next temp pointer. */
297    *next_temp = temp + num_temps;
298 
299    assert((temp + num_temps) <=
300           (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301 
302    return temp;
303 }
304 
305 /**
306  * Write a 32-bit constant indexed by the long range.
307  *
308  * \param data_block Pointer to data block to write to.
309  * \param index Index within the data to write to.
310  * \param dword The 32-bit constant to write.
311  */
312 static void
pvr_pds_write_constant32(uint32_t * data_block,uint32_t index,uint32_t dword0)313 pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314 {
315    /* Check range. */
316    assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317                     PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318 
319    data_block[index + 0] = dword0;
320 
321    PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322 }
323 
324 /**
325  * Write a 64-bit constant indexed by the long range.
326  *
327  * \param data_block Pointer to data block to write to.
328  * \param index Index within the data to write to.
329  * \param dword0 Lower half of the 64 bit constant.
330  * \param dword1 Upper half of the 64 bit constant.
331  */
pvr_pds_write_constant64(uint32_t * data_block,uint32_t index,uint32_t dword0,uint32_t dword1)332 static void pvr_pds_write_constant64(uint32_t *data_block,
333                                      uint32_t index,
334                                      uint32_t dword0,
335                                      uint32_t dword1)
336 {
337    /* Has to be on 64 bit boundary. */
338    assert((index & 1) == 0);
339 
340    /* Check range. */
341    assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342                            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343 
344    data_block[index + 0] = dword0;
345    data_block[index + 1] = dword1;
346 
347    PVR_PDS_PRINT_DATA("WriteConstant64",
348                       ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349                       index);
350 }
351 
352 /**
353  * Write a 64-bit constant from a single wide word indexed by the long-range
354  * number.
355  *
356  * \param data_block Pointer to data block to write to.
357  * \param index Index within the data to write to.
358  * \param word The 64-bit constant to write.
359  */
360 
361 static void
pvr_pds_write_wide_constant(uint32_t * data_block,uint32_t index,uint64_t word)362 pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363 {
364    /* Has to be on 64 bit boundary. */
365    assert((index & 1) == 0);
366 
367    /* Check range. */
368    assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369                            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370 
371    data_block[index + 0] = L32(word);
372    data_block[index + 1] = H32(word);
373 
374    PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375 }
376 
pvr_pds_write_dma_address(uint32_t * data_block,uint32_t index,uint64_t address,bool coherent,const struct pvr_device_info * dev_info)377 static void pvr_pds_write_dma_address(uint32_t *data_block,
378                                       uint32_t index,
379                                       uint64_t address,
380                                       bool coherent,
381                                       const struct pvr_device_info *dev_info)
382 {
383    /* Has to be on 64 bit boundary. */
384    assert((index & 1) == 0);
385 
386    if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387       address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388 
389    /* Check range. */
390    assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391                            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392 
393    data_block[index + 0] = L32(address);
394    data_block[index + 1] = H32(address);
395 
396    PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397 }
398 
399 /**
400  * External API to append a 64-bit constant to an existing data segment
401  * allocation.
402  *
403  * \param constants Pointer to start of data segment.
404  * \param constant_value Value to write to constant.
405  * \param data_size The number of constants allocated.
406  * \returns The address of the next constant.
407  */
pvr_pds_append_constant64(uint32_t * constants,uint64_t constant_value,uint32_t * data_size)408 uint32_t pvr_pds_append_constant64(uint32_t *constants,
409                                    uint64_t constant_value,
410                                    uint32_t *data_size)
411 {
412    /* Calculate next constant from current data size. */
413    uint32_t next_constant = *data_size;
414    uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415 
416    /* Set the value. */
417    pvr_pds_write_wide_constant(constants, constant, constant_value);
418 
419    return constant;
420 }
421 
pvr_pds_pixel_shader_sa_initialize(struct pvr_pds_pixel_shader_sa_program * program)422 void pvr_pds_pixel_shader_sa_initialize(
423    struct pvr_pds_pixel_shader_sa_program *program)
424 {
425    memset(program, 0, sizeof(*program));
426 }
427 
428 /**
429  * Encode a DMA burst.
430  *
431  * \param dma_control DMA control words.
432  * \param dma_address DMA address.
433  * \param dest_offset Destination offset in the attribute.
434  * \param dma_size The size of the DMA in words.
435  * \param src_address Source address for the burst.
436  * \param last Last DMA in program.
437  * \param dev_info PVR device info structure.
438  * \returns The number of DMA transfers required.
439  */
pvr_pds_encode_dma_burst(uint32_t * dma_control,uint64_t * dma_address,uint32_t dest_offset,uint32_t dma_size,uint64_t src_address,bool last,const struct pvr_device_info * dev_info)440 uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441                                   uint64_t *dma_address,
442                                   uint32_t dest_offset,
443                                   uint32_t dma_size,
444                                   uint64_t src_address,
445                                   bool last,
446                                   const struct pvr_device_info *dev_info)
447 {
448    dma_control[0] = dma_size
449                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
450    dma_control[0] |= dest_offset
451                      << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
452 
453    dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
454                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
455 
456    if (last)
457       dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
458 
459    dma_address[0] = src_address;
460    if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
461       dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
462 
463    /* Force to 1 DMA. */
464    return 1;
465 }
466 
467 /* FIXME: use the csbgen interface and pvr_csb_pack.
468  * FIXME: use bool for phase_rate_change.
469  */
470 /**
471  * Sets up the USC control words for a DOUTU.
472  *
473  * \param usc_task_control USC task control structure to be setup.
474  * \param execution_address USC execution virtual address.
475  * \param usc_temps Number of USC temps.
476  * \param sample_rate Sample rate for the DOUTU.
477  * \param phase_rate_change Phase rate change for the DOUTU.
478  */
pvr_pds_setup_doutu(struct pvr_pds_usc_task_control * usc_task_control,uint64_t execution_address,uint32_t usc_temps,uint32_t sample_rate,bool phase_rate_change)479 void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
480                          uint64_t execution_address,
481                          uint32_t usc_temps,
482                          uint32_t sample_rate,
483                          bool phase_rate_change)
484 {
485    usc_task_control->src0 = UINT64_C(0);
486 
487    /* Set the execution address. */
488    pvr_set_usc_execution_address64(&(usc_task_control->src0),
489                                    execution_address);
490 
491    if (usc_temps > 0) {
492       /* Temps are allocated in blocks of 4 dwords. */
493       usc_temps =
494          DIV_ROUND_UP(usc_temps,
495                       PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
496 
497       /* Check for losing temps due to too many requested. */
498       assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
499              usc_temps);
500 
501       usc_task_control->src0 |=
502          ((uint64_t)(usc_temps &
503                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
504          << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
505    }
506 
507    if (sample_rate > 0) {
508       usc_task_control->src0 |=
509          ((uint64_t)sample_rate)
510          << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
511    }
512 
513    if (phase_rate_change) {
514       usc_task_control->src0 |=
515          PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
516    }
517 }
518 
519 /**
520  * Generates the PDS pixel event program.
521  *
522  * \param program Pointer to the PDS pixel event program.
523  * \param buffer Pointer to the buffer for the program.
524  * \param gen_mode Generate either a data segment or code segment.
525  * \param dev_info PVR device info structure.
526  * \returns Pointer to just beyond the buffer for the program.
527  */
528 uint32_t *
pvr_pds_generate_pixel_event(struct pvr_pds_event_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)529 pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
530                              uint32_t *restrict buffer,
531                              enum pvr_pds_generate_mode gen_mode,
532                              const struct pvr_device_info *dev_info)
533 {
534    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
535    uint32_t *constants = buffer;
536 
537    uint32_t data_size = 0;
538 
539    /* Copy the DMA control words and USC task control words to constants, then
540     * arrange them so that the 64-bit words are together followed by the 32-bit
541     * words.
542     */
543    uint32_t control_constant =
544       pvr_pds_get_constants(&next_constant, 2, &data_size);
545    uint32_t emit_constant =
546       pvr_pds_get_constants(&next_constant,
547                             (2 * program->num_emit_word_pairs),
548                             &data_size);
549 
550    uint32_t control_word_constant =
551       pvr_pds_get_constants(&next_constant,
552                             program->num_emit_word_pairs,
553                             &data_size);
554 
555    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
556       /* Src0 for DOUTU. */
557       pvr_pds_write_wide_constant(buffer,
558                                   control_constant,
559                                   program->task_control.src0); /* DOUTU */
560       /* 64-bit Src0. */
561 
562       /* Emit words for end of tile program. */
563       for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
564          pvr_pds_write_constant64(constants,
565                                   emit_constant + (2 * i),
566                                   program->emit_words[(2 * i) + 0],
567                                   program->emit_words[(2 * i) + 1]);
568       }
569 
570       /* Control words. */
571       for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
572          uint32_t doutw = pvr_pds_encode_doutw_src1(
573             (2 * i),
574             PVR_PDS_DOUTW_LOWER64,
575             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
576             false,
577             dev_info);
578 
579          if (i == (program->num_emit_word_pairs - 1))
580             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
581 
582          pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
583       }
584    }
585 
586    else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
587       /* DOUTW the state into the shared register. */
588       for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
589          *buffer++ = pvr_pds_encode_doutw64(
590             /* cc */ 0,
591             /* END */ 0,
592             /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
593             /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
594                                                          */
595       }
596 
597       /* Kick the USC. */
598       *buffer++ = pvr_pds_encode_doutu(
599          /* cc */ 0,
600          /* END */ 1,
601          /* SRC0 */ control_constant >> 1);
602    }
603 
604    uint32_t code_size = 1 + program->num_emit_word_pairs;
605 
606    /* Save the data segment Pointer and size. */
607    program->data_segment = constants;
608    program->data_size = data_size;
609    program->code_size = code_size;
610 
611    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
612       return (constants + next_constant);
613 
614    if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
615       return buffer;
616 
617    return NULL;
618 }
619 
620 /**
621  * Checks if any of the vertex streams contains instance data.
622  *
623  * \param streams Streams contained in the vertex shader.
624  * \param num_streams Number of vertex streams.
625  * \returns true if one or more of the given vertex streams contains
626  *          instance data, otherwise false.
627  */
pvr_pds_vertex_streams_contains_instance_data(const struct pvr_pds_vertex_stream * streams,uint32_t num_streams)628 static bool pvr_pds_vertex_streams_contains_instance_data(
629    const struct pvr_pds_vertex_stream *streams,
630    uint32_t num_streams)
631 {
632    for (uint32_t i = 0; i < num_streams; i++) {
633       const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
634       if (vertex_stream->instance_data)
635          return true;
636    }
637 
638    return false;
639 }
640 
pvr_pds_get_bank_based_constants(uint32_t num_backs,uint32_t * next_constant,uint32_t num_constants,uint32_t * count)641 static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
642                                                  uint32_t *next_constant,
643                                                  uint32_t num_constants,
644                                                  uint32_t *count)
645 {
646    /* Allocate constant for PDS vertex shader where constant is divided into
647     * banks.
648     */
649    uint32_t constant;
650 
651    assert(num_constants == 1 || num_constants == 2);
652 
653    if (*next_constant >= (num_backs << 3))
654       return pvr_pds_get_constants(next_constant, num_constants, count);
655 
656    if ((*next_constant % 8) == 0) {
657       constant = *next_constant;
658 
659       if (num_constants == 1)
660          *next_constant += 1;
661       else
662          *next_constant += 8;
663    } else if (num_constants == 1) {
664       constant = *next_constant;
665       *next_constant += 7;
666    } else {
667       *next_constant += 7;
668       constant = *next_constant;
669 
670       if (*next_constant >= (num_backs << 3)) {
671          *next_constant += 2;
672          *count += 2;
673       } else {
674          *next_constant += 8;
675       }
676    }
677    return constant;
678 }
679 
680 /**
681  * Generates a PDS program to load USC vertex inputs based from one or more
682  * vertex buffers, each containing potentially multiple elements, and then a
683  * DOUTU to execute the USC.
684  *
685  * \param program Pointer to the description of the program which should be
686  *                generated.
687  * \param buffer Pointer to buffer that receives the output of this function.
688  *               Will either be the data segment or code segment depending on
689  *               gen_mode.
690  * \param gen_mode Which part to generate, either data segment or
691  *                 code segment. If PDS_GENERATE_SIZES is specified, nothing is
692  *                 written, but size information in program is updated.
693  * \param dev_info PVR device info structure.
694  * \returns Pointer to just beyond the buffer for the data - i.e the value
695  *          of the buffer after writing its contents.
696  */
697 /* FIXME: Implement PDS_GENERATE_CODEDATA_SEGMENTS? */
698 uint32_t *
pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)699 pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700                       uint32_t *restrict buffer,
701                       enum pvr_pds_generate_mode gen_mode,
702                       const struct pvr_device_info *dev_info)
703 {
704    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705    uint32_t next_stream_constant;
706    uint32_t next_temp;
707    uint32_t usc_control_constant64;
708    uint32_t stride_constant32 = 0;
709    uint32_t dma_address_constant64 = 0;
710    uint32_t dma_control_constant64;
711    uint32_t multiplier_constant32 = 0;
712    uint32_t base_instance_const32 = 0;
713 
714    uint32_t temp = 0;
715    uint32_t index_temp64 = 0;
716    uint32_t num_vertices_temp64 = 0;
717    uint32_t pre_index_temp = (uint32_t)(-1);
718    bool first_ddmadt = true;
719    uint32_t input_register0;
720    uint32_t input_register1;
721    uint32_t input_register2;
722 
723    struct pvr_pds_vertex_stream *vertex_stream;
724    struct pvr_pds_vertex_element *vertex_element;
725    uint32_t shift_2s_comp;
726 
727    uint32_t data_size = 0;
728    uint32_t code_size = 0;
729    uint32_t temps_used = 0;
730 
731    bool direct_writes_needed = false;
732 
733    uint32_t consts_size = 0;
734    uint32_t vertex_id_control_word_const32 = 0;
735    uint32_t instance_id_control_word_const32 = 0;
736    uint32_t instance_id_modifier_word_const32 = 0;
737    uint32_t geometry_id_control_word_const64 = 0;
738    uint32_t empty_dma_control_constant64 = 0;
739 
740    bool any_instanced_stream =
741       pvr_pds_vertex_streams_contains_instance_data(program->streams,
742                                                     program->num_streams);
743 
744    uint32_t base_instance_register = 0;
745    uint32_t ddmadt_enables = 0;
746 
747    bool issue_empty_ddmad = false;
748    uint32_t last_stream_index = program->num_streams - 1;
749    bool current_p0 = false;
750    uint32_t skip_stream_flag = 0;
751 
752    /* Generate the PDS vertex shader data. */
753 
754 #if MESA_DEBUG
755    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756       for (uint32_t i = 0; i < program->data_size; i++)
757          buffer[i] = 0xDEADBEEF;
758    }
759 #endif
760 
761    /* Generate the PDS vertex shader program */
762    next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763    /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764    input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765    /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766    input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767 
768    if (program->iterate_remap_id)
769       input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770    else
771       input_register2 = 0; /* Not used, but need to silence the compiler. */
772 
773    /* Generate the PDS vertex shader code. The constants in the data block are
774     * arranged as follows:
775     *
776     * 64 bit bank 0        64 bit bank 1          64 bit bank 2    64 bit bank
777     * 3 Not used (tmps)    Stride | Multiplier    Address          Control
778     */
779 
780    /* Find out how many constants are needed by streams. */
781    for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782       pvr_pds_get_constants(&next_constant,
783                             8 * program->streams[stream].num_elements,
784                             &consts_size);
785    }
786 
787    /* If there are no vertex streams allocate the first bank for USC Code
788     * Address.
789     */
790    if (consts_size == 0)
791       pvr_pds_get_constants(&next_constant, 2, &consts_size);
792    else
793       next_constant = 8;
794 
795    direct_writes_needed = program->iterate_instance_id ||
796                           program->iterate_vtx_id || program->iterate_remap_id;
797 
798    if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799       /* Evaluate what config of DDMAD should be used for each stream. */
800       for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801          vertex_stream = &program->streams[stream];
802 
803          if (vertex_stream->use_ddmadt) {
804             ddmadt_enables |= (1 << stream);
805 
806             /* The condition for index value is:
807              * index * stride + size <= bufferSize (all in unit of byte)
808              */
809             if (vertex_stream->stride == 0) {
810                if (vertex_stream->elements[0].size <=
811                    vertex_stream->buffer_size_in_bytes) {
812                   /* index can be any value -> no need to use DDMADT. */
813                   ddmadt_enables &= (~(1 << stream));
814                } else {
815                   /* No index works -> no need to issue DDMAD instruction.
816                    */
817                   skip_stream_flag |= (1 << stream);
818                }
819             } else {
820                /* index * stride + size <= bufferSize
821                 *
822                 * can be converted to:
823                 * index <= (bufferSize - size) / stride
824                 *
825                 * where maximum index is:
826                 * integer((bufferSize - size) / stride).
827                 */
828                if (vertex_stream->buffer_size_in_bytes <
829                    vertex_stream->elements[0].size) {
830                   /* No index works -> no need to issue DDMAD instruction.
831                    */
832                   skip_stream_flag |= (1 << stream);
833                } else {
834                   uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835                                         vertex_stream->elements[0].size) /
836                                        vertex_stream->stride;
837                   if (max_index == 0xFFFFFFFFu) {
838                      /* No need to use DDMADT as all possible indices can
839                       * pass the test.
840                       */
841                      ddmadt_enables &= (~(1 << stream));
842                   } else {
843                      /* In this case, test condition can be changed to
844                       * index < max_index + 1.
845                       */
846                      program->streams[stream].num_vertices =
847                         pvr_pds_get_bank_based_constants(program->num_streams,
848                                                          &next_constant,
849                                                          1,
850                                                          &consts_size);
851 
852                      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853                         pvr_pds_write_constant32(
854                            buffer,
855                            program->streams[stream].num_vertices,
856                            max_index + 1);
857                      }
858                   }
859                }
860             }
861          }
862 
863          if ((skip_stream_flag & (1 << stream)) == 0) {
864             issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865             last_stream_index = stream;
866          }
867       }
868    } else {
869       if (program->num_streams > 0 &&
870           program->streams[program->num_streams - 1].use_ddmadt) {
871          issue_empty_ddmad = true;
872       }
873    }
874 
875    if (direct_writes_needed)
876       issue_empty_ddmad = false;
877 
878    if (issue_empty_ddmad) {
879       /* An empty DMA control const (DMA size = 0) is required in case the
880        * last DDMADD is predicated out and last flag does not have any usage.
881        */
882       empty_dma_control_constant64 =
883          pvr_pds_get_bank_based_constants(program->num_streams,
884                                           &next_constant,
885                                           2,
886                                           &consts_size);
887    }
888 
889    /* Assign constants for non stream or base instance if there is any
890     * instanced stream.
891     */
892    if (direct_writes_needed || any_instanced_stream ||
893        program->instance_id_modifier) {
894       if (program->iterate_vtx_id) {
895          vertex_id_control_word_const32 =
896             pvr_pds_get_bank_based_constants(program->num_streams,
897                                              &next_constant,
898                                              1,
899                                              &consts_size);
900       }
901 
902       if (program->iterate_instance_id || program->instance_id_modifier) {
903          if (program->instance_id_modifier == 0) {
904             instance_id_control_word_const32 =
905                pvr_pds_get_bank_based_constants(program->num_streams,
906                                                 &next_constant,
907                                                 1,
908                                                 &consts_size);
909          } else {
910             instance_id_modifier_word_const32 =
911                pvr_pds_get_bank_based_constants(program->num_streams,
912                                                 &next_constant,
913                                                 1,
914                                                 &consts_size);
915             if ((instance_id_modifier_word_const32 % 2) == 0) {
916                instance_id_control_word_const32 =
917                   pvr_pds_get_bank_based_constants(program->num_streams,
918                                                    &next_constant,
919                                                    1,
920                                                    &consts_size);
921             } else {
922                instance_id_control_word_const32 =
923                   instance_id_modifier_word_const32;
924                instance_id_modifier_word_const32 =
925                   pvr_pds_get_bank_based_constants(program->num_streams,
926                                                    &next_constant,
927                                                    1,
928                                                    &consts_size);
929             }
930          }
931       }
932 
933       if (program->base_instance != 0) {
934          base_instance_const32 =
935             pvr_pds_get_bank_based_constants(program->num_streams,
936                                              &next_constant,
937                                              1,
938                                              &consts_size);
939       }
940 
941       if (program->iterate_remap_id) {
942          geometry_id_control_word_const64 =
943             pvr_pds_get_bank_based_constants(program->num_streams,
944                                              &next_constant,
945                                              2,
946                                              &consts_size);
947       }
948    }
949 
950    if (program->instance_id_modifier != 0) {
951       /* This instanceID modifier is used when a draw array instanced call
952        * sourcing from client data cannot fit into vertex buffer and needs to
953        * be broken down into several draw calls.
954        */
955 
956       code_size += 1;
957 
958       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959          pvr_pds_write_constant32(buffer,
960                                   instance_id_modifier_word_const32,
961                                   program->instance_id_modifier);
962       } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963          *buffer++ = pvr_pds_inst_encode_add32(
964             /* cc */ 0x0,
965             /* ALUM */ 0, /* Unsigned */
966             /* SNA */ 0, /* Add */
967             /* SRC0 32b */ instance_id_modifier_word_const32,
968             /* SRC1 32b */ input_register1,
969             /* DST 32b */ input_register1);
970       }
971    }
972 
973    /* Adjust instanceID if necessary. */
974    if (any_instanced_stream || program->iterate_instance_id) {
975       if (program->base_instance != 0) {
976          assert(!program->draw_indirect);
977 
978          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979             pvr_pds_write_constant32(buffer,
980                                      base_instance_const32,
981                                      program->base_instance);
982          }
983 
984          base_instance_register = base_instance_const32;
985       }
986 
987       if (program->draw_indirect) {
988          assert((program->instance_id_modifier == 0) &&
989                 (program->base_instance == 0));
990 
991          base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992       }
993    }
994 
995    next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996    usc_control_constant64 =
997       pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998 
999    for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000       bool instance_data_with_base_instance;
1001 
1002       if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003           ((skip_stream_flag & (1 << stream)) != 0)) {
1004          continue;
1005       }
1006 
1007       vertex_stream = &program->streams[stream];
1008 
1009       instance_data_with_base_instance =
1010          ((vertex_stream->instance_data) &&
1011           ((program->base_instance > 0) || (program->draw_indirect)));
1012 
1013       /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014        * USC constants.
1015        */
1016       if (stream == 0) {
1017          stride_constant32 =
1018             pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019       } else {
1020          next_constant =
1021             pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022 
1023          /* Skip bank 0. */
1024          stride_constant32 = next_constant + 2;
1025       }
1026 
1027       multiplier_constant32 = stride_constant32 + 1;
1028 
1029       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030          pvr_pds_write_constant32(buffer,
1031                                   stride_constant32,
1032                                   vertex_stream->stride);
1033 
1034          /* Vertex stream frequency multiplier. */
1035          if (vertex_stream->multiplier)
1036             pvr_pds_write_constant32(buffer,
1037                                      multiplier_constant32,
1038                                      vertex_stream->multiplier);
1039       }
1040 
1041       /* Update the code size count and temps count for the above code
1042        * segment.
1043        */
1044       if (vertex_stream->current_state) {
1045          code_size += 1;
1046          temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047       } else {
1048          unsigned int num_temps_required = 0;
1049 
1050          if (vertex_stream->multiplier) {
1051             num_temps_required += 2;
1052             code_size += 3;
1053 
1054             if (vertex_stream->shift) {
1055                code_size += 1;
1056 
1057                if ((int32_t)vertex_stream->shift > 0)
1058                   code_size += 1;
1059             }
1060          } else if (vertex_stream->shift) {
1061             code_size += 1;
1062             num_temps_required += 1;
1063          } else if (instance_data_with_base_instance) {
1064             num_temps_required += 1;
1065          }
1066 
1067          if (num_temps_required != 0) {
1068             temp = pvr_pds_get_temps(&next_temp,
1069                                      num_temps_required,
1070                                      &temps_used); /* 64-bit */
1071          } else {
1072             temp = vertex_stream->instance_data ? input_register1
1073                                                 : input_register0;
1074          }
1075 
1076          if (instance_data_with_base_instance)
1077             code_size += 1;
1078       }
1079 
1080       /* The real code segment. */
1081       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082          /* If it's current state stream, then index = 0 always. */
1083          if (vertex_stream->current_state) {
1084             /* Put zero in temp. */
1085             *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086          } else if (vertex_stream->multiplier) {
1087             /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088              * new: Iout = (Iin * Multiplier) >> (shift+31)
1089              */
1090 
1091             /* Put zero in temp. Need zero for add part of the following
1092              * MAD. MAD source is 64 bit, so need two LIMMs.
1093              */
1094             *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095             /* Put zero in temp. Need zero for add part of the following
1096              * MAD.
1097              */
1098             *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099 
1100             /* old: (Iin * (Multiplier+2^24))
1101              * new: (Iin * Multiplier)
1102              */
1103             *buffer++ = pvr_rogue_inst_encode_mad(
1104                0, /* Sign of add is positive. */
1105                0, /* Unsigned ALU mode */
1106                0, /* Unconditional */
1107                multiplier_constant32,
1108                vertex_stream->instance_data ? input_register1 : input_register0,
1109                temp / 2,
1110                temp / 2);
1111 
1112             if (vertex_stream->shift) {
1113                int32_t shift = (int32_t)vertex_stream->shift;
1114 
1115                /* new: >> (shift + 31) */
1116                shift += 31;
1117                shift *= -1;
1118 
1119                if (shift < -31) {
1120                   /* >> (31) */
1121                   shift_2s_comp = 0xFFFE1;
1122                   *buffer++ = pvr_pds_inst_encode_sftlp64(
1123                      /* cc */ 0,
1124                      /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125                      /* IM */ 1, /*  enable immediate */
1126                      /* SRC0 */ temp / 2,
1127                      /* SRC1 */ input_register0, /* This won't be used in
1128                                                   * a shift operation.
1129                                                   */
1130                      /* SRC2 (Shift) */ shift_2s_comp,
1131                      /* DST */ temp / 2);
1132                   shift += 31;
1133                }
1134 
1135                /* old: >> (Shift+24)
1136                 * new: >> (shift + 31)
1137                 */
1138                shift_2s_comp = *((uint32_t *)&shift);
1139                *buffer++ = pvr_pds_inst_encode_sftlp64(
1140                   /* cc */ 0,
1141                   /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142                   /* IM */ 1, /*enable immediate */
1143                   /* SRC0 */ temp / 2,
1144                   /* SRC1 */ input_register0, /* This won't be used in
1145                                                * a shift operation.
1146                                                */
1147                   /* SRC2 (Shift) */ shift_2s_comp,
1148                   /* DST */ temp / 2);
1149             }
1150 
1151             if (instance_data_with_base_instance) {
1152                *buffer++ =
1153                   pvr_pds_inst_encode_add32(0, /* cc */
1154                                             0, /* ALNUM */
1155                                             0, /* SNA */
1156                                             base_instance_register, /* src0
1157                                                                      */
1158                                             temp, /* src1 */
1159                                             temp /* dst */
1160                   );
1161             }
1162          } else { /* NOT vertex_stream->multiplier */
1163             if (vertex_stream->shift) {
1164                /* Shift Index/InstanceNum Right by shift bits. Put result
1165                 * in a Temp.
1166                 */
1167 
1168                /* 2's complement of shift as this will be a right shift. */
1169                shift_2s_comp = ~(vertex_stream->shift) + 1;
1170 
1171                *buffer++ = pvr_pds_inst_encode_sftlp32(
1172                   /* IM */ 1, /*  enable immediate. */
1173                   /* cc */ 0,
1174                   /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175                   /* SRC0 */ vertex_stream->instance_data ? input_register1
1176                                                           : input_register0,
1177                   /* SRC1 */ input_register0, /* This won't be used in
1178                                                * a shift operation.
1179                                                */
1180                   /* SRC2 (Shift) */ shift_2s_comp,
1181                   /* DST */ temp);
1182 
1183                if (instance_data_with_base_instance) {
1184                   *buffer++ =
1185                      pvr_pds_inst_encode_add32(0, /* cc */
1186                                                0, /* ALNUM */
1187                                                0, /* SNA */
1188                                                base_instance_register, /* src0
1189                                                                         */
1190                                                temp, /* src1 */
1191                                                temp /* dst */
1192                      );
1193                }
1194             } else {
1195                if (instance_data_with_base_instance) {
1196                   *buffer++ =
1197                      pvr_pds_inst_encode_add32(0, /* cc */
1198                                                0, /* ALNUM */
1199                                                0, /* SNA */
1200                                                base_instance_register, /* src0
1201                                                                         */
1202                                                input_register1, /* src1 */
1203                                                temp /* dst */
1204                      );
1205                } else {
1206                   /* If the shift instruction doesn't happen, use the IR
1207                    * directly into the following MAD.
1208                    */
1209                   temp = vertex_stream->instance_data ? input_register1
1210                                                       : input_register0;
1211                }
1212             }
1213          }
1214       }
1215 
1216       if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217          if (vertex_stream->use_ddmadt)
1218             ddmadt_enables |= (1 << stream);
1219       } else {
1220          if ((ddmadt_enables & (1 << stream)) != 0) {
1221             /* Emulate what DDMADT does for range checking. */
1222             if (first_ddmadt) {
1223                /* Get an 64 bits temp such that cmp current index with
1224                 * allowed vertex number can work.
1225                 */
1226                index_temp64 =
1227                   pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228                                                                   */
1229                num_vertices_temp64 =
1230                   pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231                                                                   */
1232 
1233                index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234                num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235 
1236                code_size += 3;
1237                current_p0 = true;
1238             }
1239 
1240             code_size += (temp == pre_index_temp ? 1 : 2);
1241 
1242             if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243                if (first_ddmadt) {
1244                   /* Set predicate to be P0. */
1245                   *buffer++ = pvr_pds_encode_bra(
1246                      PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247                                                         */
1248                      0, /* Neg */
1249                      PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250                                                       */
1251                      1); /* Addr */
1252 
1253                   *buffer++ =
1254                      pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255                   *buffer++ =
1256                      pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257                }
1258 
1259                if (temp != pre_index_temp) {
1260                   *buffer++ = pvr_pds_inst_encode_sftlp32(
1261                      /* IM */ 1, /*  enable immediate. */
1262                      /* cc */ 0,
1263                      /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264                      /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265                      /* SRC1 */ 0,
1266                      /* SRC2 (Shift) */ 0,
1267                      /* DST */ index_temp64);
1268                }
1269 
1270                *buffer++ = pvr_pds_inst_encode_sftlp32(
1271                   /* IM */ 1, /*  enable immediate. */
1272                   /* cc */ 0,
1273                   /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274                   /* SRC0 */ num_vertices_temp64 + 1,
1275                   /* SRC1 */ vertex_stream->num_vertices,
1276                   /* SRC2 (Shift) */ 0,
1277                   /* DST */ num_vertices_temp64);
1278             }
1279 
1280             first_ddmadt = false;
1281 
1282             pre_index_temp = temp;
1283          }
1284       }
1285 
1286       /* Process the elements in the stream. */
1287       for (uint32_t element = 0; element < vertex_stream->num_elements;
1288            element++) {
1289          bool terminate = false;
1290 
1291          vertex_element = &vertex_stream->elements[element];
1292          /* Check if last DDMAD needs terminate or not. */
1293          if ((element == (vertex_stream->num_elements - 1)) &&
1294              (stream == last_stream_index)) {
1295             terminate = !issue_empty_ddmad && !direct_writes_needed;
1296          }
1297 
1298          /* Get a new set of constants for this element. */
1299          if (element) {
1300             /* Get all 8 32 bit constants at once. */
1301             next_constant =
1302                pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303          }
1304 
1305          dma_address_constant64 = next_constant + 4;
1306          dma_control_constant64 = dma_address_constant64 + 2;
1307 
1308          if (vertex_element->component_size == 0) {
1309             /* Standard DMA.
1310              *
1311              * Write the DMA transfer control words into the PDS data
1312              * section.
1313              *
1314              * DMA Address is 40-bit.
1315              */
1316 
1317             if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318                uint32_t dma_control_word;
1319                uint64_t dma_control_word64 = 0;
1320                uint32_t dma_size;
1321 
1322                /* Write the address to the constant. */
1323                pvr_pds_write_dma_address(buffer,
1324                                          dma_address_constant64,
1325                                          vertex_stream->address +
1326                                             (uint64_t)vertex_element->offset,
1327                                          false,
1328                                          dev_info);
1329                {
1330                   if (program->stream_patch_offsets) {
1331                      program
1332                         ->stream_patch_offsets[program->num_stream_patches++] =
1333                         (stream << 16) | (dma_address_constant64 >> 1);
1334                   }
1335                }
1336 
1337                /* Size is in bytes - round up to nearest 32 bit word. */
1338                dma_size =
1339                   (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340                   PVR_PDS_DWORD_SHIFT;
1341 
1342                assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343 
1344                /* Set up the dma transfer control word. */
1345                dma_control_word =
1346                   dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347 
1348                dma_control_word |=
1349                   vertex_element->reg
1350                   << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351 
1352                dma_control_word |=
1353                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355 
1356                if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357                   if ((ddmadt_enables & (1 << stream)) != 0) {
1358                      assert(
1359                         ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360                            << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361                           ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362                          PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363                         (uint64_t)vertex_stream->buffer_size_in_bytes);
1364                      dma_control_word64 =
1365                         (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366                          (((uint64_t)vertex_stream->buffer_size_in_bytes
1367                            << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368                           ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369                   }
1370                }
1371                /* If this is the last dma then also set the last flag. */
1372                if (terminate) {
1373                   dma_control_word |=
1374                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375                }
1376 
1377                /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378                 * spec.
1379                 */
1380                pvr_pds_write_wide_constant(buffer,
1381                                            dma_control_constant64,
1382                                            dma_control_word64 |
1383                                               (uint64_t)dma_control_word);
1384             }
1385 
1386             if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387                if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388                   if ((ddmadt_enables & (1 << stream)) != 0) {
1389                      *buffer++ = pvr_pds_inst_encode_cmp(
1390                         0, /* cc enable */
1391                         PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392                         index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393                         (num_vertices_temp64 >> 1) +
1394                            PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395                                                                       (REGS64)
1396                                                                     */
1397                   }
1398                }
1399                /* Multiply by the vertex stream stride and add the base
1400                 * followed by a DOUTD.
1401                 *
1402                 * dmad32 (C0 * T0) + C1, C2
1403                 * src0 = stride  src1 = index  src2 = baseaddr src3 =
1404                 * doutd part
1405                 */
1406 
1407                uint32_t cc;
1408                if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409                   cc = 0;
1410                else
1411                   cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412 
1413                *buffer++ = pvr_pds_inst_encode_ddmad(
1414                   /* cc */ cc,
1415                   /* END */ 0,
1416                   /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417                   /* SRC1 */ temp, /* Index 32-bit*/
1418                   /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419                                                                   * Address
1420                                                                   * +
1421                                                                   * Offset
1422                                                                   */
1423                   /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424                                                                  * Transfer
1425                                                                  * Control
1426                                                                  * Word.
1427                                                                  */
1428                );
1429             }
1430 
1431             if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432                 ((ddmadt_enables & (1 << stream)) != 0)) {
1433                code_size += 1;
1434             }
1435             code_size += 1;
1436          } else {
1437             /* Repeat DMA.
1438              *
1439              * Write the DMA transfer control words into the PDS data
1440              * section.
1441              *
1442              * DMA address is 40-bit.
1443              */
1444 
1445             if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446                uint32_t dma_control_word;
1447 
1448                /* Write the address to the constant. */
1449                pvr_pds_write_dma_address(buffer,
1450                                          dma_address_constant64,
1451                                          vertex_stream->address +
1452                                             (uint64_t)vertex_element->offset,
1453                                          false,
1454                                          dev_info);
1455 
1456                /* Set up the DMA transfer control word. */
1457                dma_control_word =
1458                   vertex_element->size
1459                   << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460 
1461                dma_control_word |=
1462                   vertex_element->reg
1463                   << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464 
1465                switch (vertex_element->component_size) {
1466                case 4: {
1467                   dma_control_word |=
1468                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469                   break;
1470                }
1471                case 3: {
1472                   dma_control_word |=
1473                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474                   break;
1475                }
1476                case 2: {
1477                   dma_control_word |=
1478                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479                   break;
1480                }
1481                default: {
1482                   dma_control_word |=
1483                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484                   break;
1485                }
1486                }
1487 
1488                dma_control_word |=
1489                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490 
1491                dma_control_word |=
1492                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494 
1495                /* If this is the last dma then also set the last flag. */
1496                if (terminate) {
1497                   dma_control_word |=
1498                      PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499                }
1500 
1501                /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502                 * spec.
1503                 */
1504                pvr_pds_write_wide_constant(buffer,
1505                                            dma_control_constant64,
1506                                            (uint64_t)dma_control_word);
1507             }
1508 
1509             if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510                /* Multiply by the vertex stream stride and add the base
1511                 * followed by a DOUTD.
1512                 *
1513                 * dmad32 (C0 * T0) + C1, C2
1514                 * src0 = stride  src1 = index  src2 = baseaddr src3 =
1515                 * doutd part
1516                 */
1517                *buffer++ = pvr_pds_inst_encode_ddmad(
1518                   /* cc */ 0,
1519                   /* END */ 0,
1520                   /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521                   /* SRC1 */ temp, /* Index 32-bit*/
1522                   /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523                                                                   * Address
1524                                                                   * +
1525                                                                   * Offset.
1526                                                                   */
1527                   /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528                                                                  * Transfer
1529                                                                  * Control
1530                                                                  * Word.
1531                                                                  */
1532                );
1533             }
1534 
1535             code_size += 1;
1536          } /* End of repeat DMA. */
1537       } /* Element loop */
1538    } /* Stream loop */
1539 
1540    if (issue_empty_ddmad) {
1541       /* Issue an empty last DDMAD, always executed. */
1542       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543          pvr_pds_write_wide_constant(
1544             buffer,
1545             empty_dma_control_constant64,
1546             PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547       }
1548 
1549       code_size += 1;
1550 
1551       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552          *buffer++ = pvr_pds_inst_encode_ddmad(
1553             /* cc */ 0,
1554             /* END */ 0,
1555             /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556             /* SRC1 */ temp, /* Index 32-bit*/
1557             /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558                                                             *Address +
1559                                                             *Offset.
1560                                                             */
1561             /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562                                                                  * Transfer
1563                                                                  * Control
1564                                                                  * Word.
1565                                                                  */
1566          );
1567       }
1568    }
1569 
1570    if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571       if (current_p0) {
1572          code_size += 1;
1573 
1574          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575             /* Revert predicate back to IF0 which is required by DOUTU. */
1576             *buffer++ =
1577                pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578                                                                      */
1579                                   0, /* Neg */
1580                                   PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581                                                                     */
1582                                   1); /* Addr */
1583          }
1584       }
1585    }
1586    /* Send VertexID if requested. */
1587    if (program->iterate_vtx_id) {
1588       if (program->draw_indirect) {
1589          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590             *buffer++ = pvr_pds_inst_encode_add32(
1591                /* cc */ 0x0,
1592                /* ALUM */ 0, /* Unsigned */
1593                /* SNA */ 1, /* Minus */
1594                /* SRC0 32b */ input_register0, /* vertexID */
1595                /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596                                                                        * vertexID.
1597                                                                        */
1598                /* DST 32b */ input_register0);
1599          }
1600 
1601          code_size += 1;
1602       }
1603 
1604       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605          uint32_t doutw = pvr_pds_encode_doutw_src1(
1606             program->vtx_id_register,
1607             PVR_PDS_DOUTW_LOWER32,
1608             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609             false,
1610             dev_info);
1611 
1612          if (!program->iterate_instance_id && !program->iterate_remap_id)
1613             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614 
1615          pvr_pds_write_constant32(buffer,
1616                                   vertex_id_control_word_const32,
1617                                   doutw);
1618       } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619          *buffer++ = pvr_pds_encode_doutw64(
1620             /* cc */ 0,
1621             /* END */ 0,
1622             /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623                                                         */
1624             /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625       }
1626 
1627       code_size += 1;
1628    }
1629 
1630    /* Send InstanceID if requested. */
1631    if (program->iterate_instance_id) {
1632       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633          uint32_t doutw = pvr_pds_encode_doutw_src1(
1634             program->instance_id_register,
1635             PVR_PDS_DOUTW_UPPER32,
1636             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637             true,
1638             dev_info);
1639 
1640          if (!program->iterate_remap_id)
1641             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642 
1643          pvr_pds_write_constant32(buffer,
1644                                   instance_id_control_word_const32,
1645                                   doutw);
1646       } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647          *buffer++ = pvr_pds_encode_doutw64(
1648             /* cc */ 0,
1649             /* END */ 0,
1650             /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651             /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652       }
1653 
1654       code_size += 1;
1655    }
1656 
1657    /* Send remapped index number to vi0. */
1658    if (program->iterate_remap_id) {
1659       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660          uint32_t doutw = pvr_pds_encode_doutw_src1(
1661             0 /* vi0 */,
1662             PVR_PDS_DOUTW_LOWER32,
1663             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665             false,
1666             dev_info);
1667 
1668          pvr_pds_write_constant64(buffer,
1669                                   geometry_id_control_word_const64,
1670                                   doutw,
1671                                   0);
1672       } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673          *buffer++ = pvr_pds_encode_doutw64(
1674             /* cc */ 0,
1675             /* END */ 0,
1676             /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677                                                           * Src1
1678                                                           */
1679             /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680       }
1681 
1682       code_size += 1;
1683    }
1684 
1685    /* Copy the USC task control words to constants. */
1686    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687       pvr_pds_write_wide_constant(buffer,
1688                                   usc_control_constant64,
1689                                   program->usc_task_control.src0); /* 64-bit
1690                                                                     * Src0
1691                                                                     */
1692       if (program->stream_patch_offsets) {
1693          /* USC TaskControl is always the first patch. */
1694          program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695       }
1696    }
1697 
1698    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699       /* Conditionally (if last in task) issue the task to the USC
1700        * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701        */
1702 
1703       *buffer++ = pvr_pds_encode_doutu(
1704          /* cc */ 1,
1705          /* END */ 1,
1706          /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707 
1708       /* End the program if the Dout did not already end it. */
1709       *buffer++ = pvr_pds_inst_encode_halt(0);
1710    }
1711 
1712    code_size += 2;
1713 
1714    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715       /* Set the data segment pointer and ensure we return 1 past the buffer
1716        * ptr.
1717        */
1718       program->data_segment = buffer;
1719 
1720       buffer += consts_size;
1721    }
1722 
1723    program->temps_used = temps_used;
1724    program->data_size = consts_size;
1725    program->code_size = code_size;
1726    program->ddmadt_enables = ddmadt_enables;
1727    if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728       program->skip_stream_flag = skip_stream_flag;
1729 
1730    return buffer;
1731 }
1732 
1733 /**
1734  * Generates a PDS program to load USC compute shader global/local/workgroup
1735  * sizes/ids and then a DOUTU to execute the USC.
1736  *
1737  * \param program Pointer to description of the program that should be
1738  *                generated.
1739  * \param buffer Pointer to buffer that receives the output of this function.
1740  *               This will be either the data segment, or the code depending on
1741  *               gen_mode.
1742  * \param gen_mode Which part to generate, either data segment or code segment.
1743  *                 If PDS_GENERATE_SIZES is specified, nothing is written, but
1744  *                 size information in program is updated.
1745  * \param dev_info PVR device info struct.
1746  * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747  *          the buffer after writing its contents.
1748  */
1749 uint32_t *
pvr_pds_compute_shader(struct pvr_pds_compute_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)1750 pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751                        uint32_t *restrict buffer,
1752                        enum pvr_pds_generate_mode gen_mode,
1753                        const struct pvr_device_info *dev_info)
1754 {
1755    uint32_t usc_control_constant64;
1756    uint32_t usc_control_constant64_coeff_update = 0;
1757    uint32_t zero_constant64 = 0;
1758 
1759    uint32_t data_size = 0;
1760    uint32_t code_size = 0;
1761    uint32_t temps_used = 0;
1762    uint32_t doutw = 0;
1763 
1764    uint32_t barrier_ctrl_word = 0;
1765    uint32_t barrier_ctrl_word2 = 0;
1766 
1767    /* Even though there are 3 IDs for local and global we only need max one
1768     * DOUTW for local, and two for global.
1769     */
1770    uint32_t work_group_id_ctrl_words[2] = { 0 };
1771    uint32_t local_id_ctrl_word = 0;
1772    uint32_t local_input_register;
1773 
1774    /* For the constant value to load into ptemp (SW fence). */
1775    uint64_t predicate_ld_src0_constant = 0;
1776    uint32_t cond_render_negate_constant = 0;
1777 
1778    uint32_t cond_render_pred_temp;
1779    uint32_t cond_render_negate_temp;
1780 
1781    /* 2x 64 bit registers that will mask out the Predicate load. */
1782    uint32_t cond_render_pred_mask_constant = 0;
1783 
1784 #if MESA_DEBUG
1785    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786       for (uint32_t j = 0; j < program->data_size; j++)
1787          buffer[j] = 0xDEADBEEF;
1788    }
1789 #endif
1790 
1791    /* All the compute input registers are in temps. */
1792    temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793 
1794    uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795 
1796    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797 
1798    if (program->kick_usc) {
1799       /* Copy the USC task control words to constants. */
1800       usc_control_constant64 =
1801          pvr_pds_get_constants(&next_constant, 2, &data_size);
1802    }
1803 
1804    if (program->has_coefficient_update_task) {
1805       usc_control_constant64_coeff_update =
1806          pvr_pds_get_constants(&next_constant, 2, &data_size);
1807    }
1808 
1809    if (program->conditional_render) {
1810       predicate_ld_src0_constant =
1811          pvr_pds_get_constants(&next_constant, 2, &data_size);
1812       cond_render_negate_constant =
1813          pvr_pds_get_constants(&next_constant, 2, &data_size);
1814       cond_render_pred_mask_constant =
1815          pvr_pds_get_constants(&next_constant, 4, &data_size);
1816 
1817       /* LD will load a 64 bit value. */
1818       cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819       cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820 
1821       program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822       program->cond_render_pred_temp = cond_render_pred_temp;
1823    }
1824 
1825    if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1826        (program->clear_pds_barrier) ||
1827        (program->kick_usc && program->conditional_render)) {
1828       zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829    }
1830 
1831    if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1832       barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833       if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834          barrier_ctrl_word2 =
1835             pvr_pds_get_constants(&next_constant, 1, &data_size);
1836       }
1837    }
1838 
1839    if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
1840        program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1841       work_group_id_ctrl_words[0] =
1842          pvr_pds_get_constants(&next_constant, 1, &data_size);
1843    }
1844 
1845    if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1846       work_group_id_ctrl_words[1] =
1847          pvr_pds_get_constants(&next_constant, 1, &data_size);
1848    }
1849 
1850    if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1851        (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1852        (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1853       local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854    }
1855 
1856    if (program->add_base_workgroup) {
1857       for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858            workgroup_component++) {
1859          if (program->work_group_input_regs[workgroup_component] !=
1860              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1861             program
1862                ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863                pvr_pds_get_constants(&next_constant, 1, &data_size);
1864          }
1865       }
1866    }
1867 
1868    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869       if (program->kick_usc) {
1870          /* Src0 for DOUTU */
1871          pvr_pds_write_wide_constant(buffer,
1872                                      usc_control_constant64,
1873                                      program->usc_task_control.src0); /* 64-bit
1874                                                                        * Src0.
1875                                                                        */
1876       }
1877 
1878       if (program->has_coefficient_update_task) {
1879          /* Src0 for DOUTU. */
1880          pvr_pds_write_wide_constant(
1881             buffer,
1882             usc_control_constant64_coeff_update,
1883             program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884       }
1885 
1886       if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1887           (program->clear_pds_barrier) ||
1888           (program->kick_usc && program->conditional_render)) {
1889          pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890                                                                    * Src0
1891                                                                    */
1892       }
1893 
1894       if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1895          if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896             /* Write the constant for the coefficient register write. */
1897             doutw = pvr_pds_encode_doutw_src1(
1898                program->barrier_coefficient + 4,
1899                PVR_PDS_DOUTW_LOWER64,
1900                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901                true,
1902                dev_info);
1903             pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904          }
1905          /* Write the constant for the coefficient register write. */
1906          doutw = pvr_pds_encode_doutw_src1(
1907             program->barrier_coefficient,
1908             PVR_PDS_DOUTW_LOWER64,
1909             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910             true,
1911             dev_info);
1912 
1913          /* Check whether the barrier is going to be the last DOUTW done by
1914           * the coefficient sync task.
1915           */
1916          if ((program->work_group_input_regs[0] ==
1917               PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1918              (program->work_group_input_regs[1] ==
1919               PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1920              (program->work_group_input_regs[2] ==
1921               PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1922             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1923          }
1924 
1925          pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1926       }
1927 
1928       /* If we want work-group id X, see if we also want work-group id Y. */
1929       if (program->work_group_input_regs[0] !=
1930              PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
1931           program->work_group_input_regs[1] !=
1932              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1933          /* Make sure we are going to DOUTW them into adjacent registers
1934           * otherwise we can't do it in one.
1935           */
1936          assert(program->work_group_input_regs[1] ==
1937                 (program->work_group_input_regs[0] + 1));
1938 
1939          doutw = pvr_pds_encode_doutw_src1(
1940             program->work_group_input_regs[0],
1941             PVR_PDS_DOUTW_LOWER64,
1942             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1943             true,
1944             dev_info);
1945 
1946          /* If we don't want the Z work-group id then this is the last one.
1947           */
1948          if (program->work_group_input_regs[2] ==
1949              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1950             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1951          }
1952 
1953          pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1954       }
1955       /* If we only want one of X or Y then handle them separately. */
1956       else {
1957          if (program->work_group_input_regs[0] !=
1958              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1959             doutw = pvr_pds_encode_doutw_src1(
1960                program->work_group_input_regs[0],
1961                PVR_PDS_DOUTW_LOWER32,
1962                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1963                true,
1964                dev_info);
1965 
1966             /* If we don't want the Z work-group id then this is the last
1967              * one.
1968              */
1969             if (program->work_group_input_regs[2] ==
1970                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1971                doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1972             }
1973 
1974             pvr_pds_write_constant32(buffer,
1975                                      work_group_id_ctrl_words[0],
1976                                      doutw);
1977          } else if (program->work_group_input_regs[1] !=
1978                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1979             doutw = pvr_pds_encode_doutw_src1(
1980                program->work_group_input_regs[1],
1981                PVR_PDS_DOUTW_UPPER32,
1982                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1983                true,
1984                dev_info);
1985 
1986             /* If we don't want the Z work-group id then this is the last
1987              * one.
1988              */
1989             if (program->work_group_input_regs[2] ==
1990                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1991                doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1992             }
1993 
1994             pvr_pds_write_constant32(buffer,
1995                                      work_group_id_ctrl_words[0],
1996                                      doutw);
1997          }
1998       }
1999 
2000       /* Handle work-group id Z. */
2001       if (program->work_group_input_regs[2] !=
2002           PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2003          doutw = pvr_pds_encode_doutw_src1(
2004             program->work_group_input_regs[2],
2005             PVR_PDS_DOUTW_UPPER32,
2006             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
2007                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2008             true,
2009             dev_info);
2010 
2011          pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
2012       }
2013 
2014       /* Handle the local IDs. */
2015       if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2016           (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2017          uint32_t dest_reg;
2018 
2019          /* If we want local id Y and Z make sure the compiler wants them in
2020           * the same register.
2021           */
2022          if (!program->flattened_work_groups) {
2023             if ((program->local_input_regs[1] !=
2024                  PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2025                 (program->local_input_regs[2] !=
2026                  PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2027                assert(program->local_input_regs[1] ==
2028                       program->local_input_regs[2]);
2029             }
2030          }
2031 
2032          if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2033             dest_reg = program->local_input_regs[1];
2034          else
2035             dest_reg = program->local_input_regs[2];
2036 
2037          /* If we want local id X and (Y or Z) then we can do that in a
2038           * single 64-bit DOUTW.
2039           */
2040          if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2041             assert(dest_reg == (program->local_input_regs[0] + 1));
2042 
2043             doutw = pvr_pds_encode_doutw_src1(
2044                program->local_input_regs[0],
2045                PVR_PDS_DOUTW_LOWER64,
2046                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2047                true,
2048                dev_info);
2049 
2050             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2051 
2052             pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2053          }
2054          /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2055           */
2056          else {
2057             doutw = pvr_pds_encode_doutw_src1(
2058                dest_reg,
2059                PVR_PDS_DOUTW_UPPER32,
2060                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2061                true,
2062                dev_info);
2063 
2064             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2065 
2066             pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2067          }
2068       }
2069       /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2070        */
2071       else if (program->local_input_regs[0] !=
2072                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2073          doutw = pvr_pds_encode_doutw_src1(
2074             program->local_input_regs[0],
2075             PVR_PDS_DOUTW_LOWER32,
2076             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2077                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2078             true,
2079             dev_info);
2080 
2081          pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2082       }
2083    }
2084 
2085    if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2086        gen_mode == PDS_GENERATE_SIZES) {
2087       const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2088 #define APPEND(X)                    \
2089    if (encode) {                     \
2090       *buffer = X;                   \
2091       buffer++;                      \
2092    } else {                          \
2093       code_size += sizeof(uint32_t); \
2094    }
2095 
2096       /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2097        * then we will be doing an infinite loop.
2098        */
2099       if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2100          assert(program->coeff_update_task_branch_size > 0);
2101 
2102       /* Test whether this is the coefficient update task or not. */
2103       APPEND(
2104          pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2105                             PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2106                             PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2107                             program->coeff_update_task_branch_size /* ADDR */));
2108 
2109       /* Do we need to initialize the barrier coefficient? */
2110       if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2111          if (PVR_HAS_QUIRK(dev_info, 51210)) {
2112             /* Initialize the second barrier coefficient registers to zero.
2113              */
2114             APPEND(pvr_pds_encode_doutw64(0, /* cc */
2115                                           0, /* END */
2116                                           barrier_ctrl_word2, /* SRC1 */
2117                                           zero_constant64 >> 1)); /* SRC0 */
2118          }
2119          /* Initialize the coefficient register to zero. */
2120          APPEND(pvr_pds_encode_doutw64(0, /* cc */
2121                                        0, /* END */
2122                                        barrier_ctrl_word, /* SRC1 */
2123                                        zero_constant64 >> 1)); /* SRC0 */
2124       }
2125 
2126       if (program->add_base_workgroup) {
2127          const uint32_t temp_values[3] = { 0, 1, 3 };
2128          for (uint32_t workgroup_component = 0; workgroup_component < 3;
2129               workgroup_component++) {
2130             if (program->work_group_input_regs[workgroup_component] ==
2131                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2132                continue;
2133 
2134             APPEND(pvr_pds_inst_encode_add32(
2135                /* cc */ 0x0,
2136                /* ALUM */ 0,
2137                /* SNA */ 0,
2138                /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2139                   program->base_workgroup_constant_offset_in_dwords
2140                      [workgroup_component],
2141                /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2142                   PVR_PDS_CDM_WORK_GROUP_ID_X +
2143                   temp_values[workgroup_component],
2144                /* DST  (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2145                   PVR_PDS_CDM_WORK_GROUP_ID_X +
2146                   temp_values[workgroup_component]));
2147          }
2148       }
2149 
2150       /* If we are going to put the work-group IDs in coefficients then we
2151        * just need to do the DOUTWs.
2152        */
2153       if ((program->work_group_input_regs[0] !=
2154            PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2155           (program->work_group_input_regs[1] !=
2156            PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2157          uint32_t dest_reg;
2158 
2159          if (program->work_group_input_regs[0] !=
2160              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2161             dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2162          } else {
2163             dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2164          }
2165 
2166          APPEND(pvr_pds_encode_doutw64(0, /* cc */
2167                                        0, /* END */
2168                                        work_group_id_ctrl_words[0], /* SRC1
2169                                                                      */
2170                                        dest_reg >> 1)); /* SRC0 */
2171       }
2172 
2173       if (program->work_group_input_regs[2] !=
2174           PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2175          APPEND(pvr_pds_encode_doutw64(
2176             0, /* cc */
2177             0, /* END */
2178             work_group_id_ctrl_words[1], /* SRC1 */
2179             (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2180                1)); /* SRC0 */
2181       }
2182 
2183       /* Issue the task to the USC. */
2184       if (program->kick_usc && program->has_coefficient_update_task) {
2185          APPEND(pvr_pds_encode_doutu(0, /* cc */
2186                                      1, /* END */
2187                                      usc_control_constant64_coeff_update >>
2188                                         1)); /* SRC0; DOUTU 64-bit Src0 */
2189       }
2190 
2191       /* Encode a HALT */
2192       APPEND(pvr_pds_inst_encode_halt(0));
2193 
2194       /* Set the branch size used to skip the coefficient sync task. */
2195       program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2196 
2197       /* DOUTW in the local IDs. */
2198 
2199       /* If we want X and Y or Z, we only need one DOUTW. */
2200       if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2201           ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2202            (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
2203          local_input_register =
2204             PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2205       } else {
2206          /* If we just want X. */
2207          if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2208             local_input_register =
2209                PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2210          }
2211          /* If we just want Y or Z. */
2212          else if (program->local_input_regs[1] !=
2213                      PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
2214                   program->local_input_regs[2] !=
2215                      PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2216             local_input_register =
2217                PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2218          }
2219       }
2220 
2221       if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2222           (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2223           (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2224          APPEND(pvr_pds_encode_doutw64(0, /* cc */
2225                                        0, /* END */
2226                                        local_id_ctrl_word, /* SRC1 */
2227                                        local_input_register >> 1)); /* SRC0
2228                                                                      */
2229       }
2230 
2231       if (program->clear_pds_barrier) {
2232          /* Zero the persistent temp (SW fence for context switch). */
2233          APPEND(pvr_pds_inst_encode_add64(
2234             0, /* cc */
2235             PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2236             PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2237             PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2238                (zero_constant64 >> 1), /* src0 = 0 */
2239             PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2240                (zero_constant64 >> 1), /* src1 = 0 */
2241             PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2242                                                              * ptemp64[0]
2243                                                              */
2244       }
2245 
2246       /* If this is a fence, issue the DOUTC. */
2247       if (program->fence) {
2248          APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2249                                           0 /* END */));
2250       }
2251 
2252       if (program->kick_usc) {
2253          if (program->conditional_render) {
2254             /* Skip if coefficient update task. */
2255             APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2256                                            0,
2257                                            PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2258                                            16));
2259 
2260             /* Load the predicate. */
2261             APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2262 
2263             /* Load negate constant into temp for CMP. */
2264             APPEND(pvr_pds_inst_encode_add64(
2265                0, /* cc */
2266                PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2267                PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2268                PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2269                   (cond_render_negate_constant >> 1), /* src0 = 0 */
2270                PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2271                   (zero_constant64 >> 1), /* src1 = 0 */
2272                PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2273                   (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2274                                                      */
2275 
2276             APPEND(pvr_pds_inst_encode_wdf(0));
2277 
2278             for (uint32_t i = 0; i < 4; i++) {
2279                APPEND(pvr_pds_inst_encode_sftlp32(
2280                   1, /* enable immediate */
2281                   0, /* cc */
2282                   PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2283                   cond_render_pred_temp + i, /* SRC0 */
2284                   cond_render_pred_mask_constant + i, /* SRC1 */
2285                   0, /* SRC2 (Shift) */
2286                   cond_render_pred_temp + i)); /* DST */
2287 
2288                APPEND(
2289                   pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2290                                               0, /* cc */
2291                                               PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2292                                                                          */
2293                                               cond_render_pred_temp + i, /* SRC0
2294                                                                           */
2295                                               cond_render_pred_temp, /* SRC1 */
2296                                               0, /* SRC2 (Shift) */
2297                                               cond_render_pred_temp)); /* DST */
2298             }
2299 
2300             APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2301                                             cond_render_pred_temp + 1, /* SRC1
2302                                                                         */
2303                                             0, /* SRC0 */
2304                                             0)); /* GLOBALREG */
2305 
2306             APPEND(pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2307                                                0, /* cc */
2308                                                PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2309                                                                            */
2310                                                cond_render_pred_temp, /* SRC0 */
2311                                                cond_render_negate_temp, /* SRC1
2312                                                                          */
2313                                                0, /* SRC2 (Shift) */
2314                                                cond_render_pred_temp)); /* DST
2315                                                                          */
2316 
2317             /* Check that the predicate is 0. */
2318             APPEND(pvr_pds_inst_encode_cmpi(
2319                0, /* cc */
2320                PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2321                (cond_render_pred_temp >> 1) +
2322                   PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2323                0)); /* SRC1 */
2324 
2325             /* If predicate is 0, skip DOUTU. */
2326             APPEND(pvr_pds_inst_encode_bra(
2327                PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2328                                                   P0 */
2329                0, /* NEG */
2330                PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2331                                                     keep
2332                                                   */
2333                2));
2334          }
2335 
2336          /* Issue the task to the USC.
2337           * DoutU src1=USC Code Base address, src2=doutu word 2.
2338           */
2339          APPEND(pvr_pds_encode_doutu(1, /* cc */
2340                                      1, /* END */
2341                                      usc_control_constant64 >> 1)); /* SRC0;
2342                                                                      * DOUTU
2343                                                                      * 64-bit
2344                                                                      * Src0.
2345                                                                      */
2346       }
2347 
2348       /* End the program if the Dout did not already end it. */
2349       APPEND(pvr_pds_inst_encode_halt(0));
2350 #undef APPEND
2351    }
2352 
2353    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2354       /* Set the data segment pointer and ensure we return 1 past the buffer
2355        * ptr.
2356        */
2357       program->data_segment = buffer;
2358 
2359       buffer += next_constant;
2360    }
2361 
2362    /* Require at least one DWORD of PDS data so the program runs. */
2363    data_size = MAX2(1, data_size);
2364 
2365    program->temps_used = temps_used;
2366    program->highest_temp = temps_used;
2367    program->data_size = data_size;
2368    if (gen_mode == PDS_GENERATE_SIZES)
2369       program->code_size = code_size;
2370 
2371    return buffer;
2372 }
2373 
2374 /**
2375  * Generates the PDS vertex shader data or code block. This program will do a
2376  * DMA into USC Constants followed by a DOUTU.
2377  *
2378  * \param program Pointer to the PDS vertex shader program.
2379  * \param buffer Pointer to the buffer for the program.
2380  * \param gen_mode Generate code or data.
2381  * \param dev_info PVR device information struct.
2382  * \returns Pointer to just beyond the code/data.
2383  */
pvr_pds_vertex_shader_sa(struct pvr_pds_vertex_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)2384 uint32_t *pvr_pds_vertex_shader_sa(
2385    struct pvr_pds_vertex_shader_sa_program *restrict program,
2386    uint32_t *restrict buffer,
2387    enum pvr_pds_generate_mode gen_mode,
2388    const struct pvr_device_info *dev_info)
2389 {
2390    uint32_t next_constant;
2391    uint32_t data_size = 0;
2392    uint32_t code_size = 0;
2393 
2394    uint32_t usc_control_constant64 = 0;
2395    uint32_t dma_address_constant64 = 0;
2396    uint32_t dma_control_constant32 = 0;
2397    uint32_t doutw_value_constant64 = 0;
2398    uint32_t doutw_control_constant32 = 0;
2399    uint32_t fence_constant_word = 0;
2400    uint32_t *buffer_base;
2401    uint32_t kick_index;
2402 
2403    uint32_t total_num_doutw =
2404       program->num_dword_doutw + program->num_q_word_doutw;
2405    uint32_t total_size_dma =
2406       program->num_dword_doutw + 2 * program->num_q_word_doutw;
2407 
2408    next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2409 
2410    /* Copy the DMA control words and USC task control words to constants.
2411     *
2412     * Arrange them so that the 64-bit words are together followed by the 32-bit
2413     * words.
2414     */
2415    if (program->kick_usc) {
2416       usc_control_constant64 =
2417          pvr_pds_get_constants(&next_constant, 2, &data_size);
2418    }
2419 
2420    if (program->clear_pds_barrier) {
2421       fence_constant_word =
2422          pvr_pds_get_constants(&next_constant, 2, &data_size);
2423    }
2424    dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2425                                                   2 * program->num_dma_kicks,
2426                                                   &data_size);
2427 
2428    /* Assign all unaligned constants together to avoid alignment issues caused
2429     * by pvr_pds_get_constants with even allocation sizes.
2430     */
2431    doutw_value_constant64 = pvr_pds_get_constants(
2432       &next_constant,
2433       total_size_dma + total_num_doutw + program->num_dma_kicks,
2434       &data_size);
2435    doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2436    dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2437 
2438    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2439       buffer_base = buffer;
2440 
2441       if (program->kick_usc) {
2442          /* Src0 for DOUTU. */
2443          pvr_pds_write_wide_constant(buffer_base,
2444                                      usc_control_constant64,
2445                                      program->usc_task_control.src0); /* DOUTU
2446                                                                        * 64-bit
2447                                                                        * Src0.
2448                                                                        */
2449          buffer += 2;
2450       }
2451 
2452       if (program->clear_pds_barrier) {
2453          /* Encode the fence constant src0. Fence barrier is initialized to
2454           * zero.
2455           */
2456          pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2457          buffer += 2;
2458       }
2459 
2460       if (total_num_doutw > 0) {
2461          for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2462             /* Write the constant for the coefficient register write. */
2463             pvr_pds_write_constant64(buffer_base,
2464                                      doutw_value_constant64,
2465                                      program->q_word_doutw_value[2 * i],
2466                                      program->q_word_doutw_value[2 * i + 1]);
2467             pvr_pds_write_constant32(
2468                buffer_base,
2469                doutw_control_constant32,
2470                program->q_word_doutw_control[i] |
2471                   ((!program->num_dma_kicks && i == total_num_doutw - 1)
2472                       ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2473                       : 0));
2474 
2475             doutw_value_constant64 += 2;
2476             doutw_control_constant32 += 1;
2477          }
2478 
2479          for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2480             /* Write the constant for the coefficient register write. */
2481             pvr_pds_write_constant32(buffer_base,
2482                                      doutw_value_constant64,
2483                                      program->dword_doutw_value[i]);
2484             pvr_pds_write_constant32(
2485                buffer_base,
2486                doutw_control_constant32,
2487                program->dword_doutw_control[i] |
2488                   ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2489                       ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2490                       : 0));
2491 
2492             doutw_value_constant64 += 1;
2493             doutw_control_constant32 += 1;
2494          }
2495 
2496          buffer += total_size_dma + total_num_doutw;
2497       }
2498 
2499       if (program->num_dma_kicks == 1) /* Most-common case. */
2500       {
2501          /* Src0 for DOUTD - Address. */
2502          pvr_pds_write_dma_address(buffer_base,
2503                                    dma_address_constant64,
2504                                    program->dma_address[0],
2505                                    false,
2506                                    dev_info);
2507 
2508          /* Src1 for DOUTD - Control Word. */
2509          pvr_pds_write_constant32(
2510             buffer_base,
2511             dma_control_constant32,
2512             program->dma_control[0] |
2513                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2514 
2515          /* Move the buffer ptr along as we will return 1 past the buffer. */
2516          buffer += 3;
2517       } else if (program->num_dma_kicks > 1) {
2518          for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2519               kick_index++) {
2520             /* Src0 for DOUTD - Address. */
2521             pvr_pds_write_dma_address(buffer_base,
2522                                       dma_address_constant64,
2523                                       program->dma_address[kick_index],
2524                                       false,
2525                                       dev_info);
2526 
2527             /* Src1 for DOUTD - Control Word. */
2528             pvr_pds_write_constant32(buffer_base,
2529                                      dma_control_constant32,
2530                                      program->dma_control[kick_index]);
2531             dma_address_constant64 += 2;
2532             dma_control_constant32 += 1;
2533          }
2534 
2535          /* Src0 for DOUTD - Address. */
2536          pvr_pds_write_dma_address(buffer_base,
2537                                    dma_address_constant64,
2538                                    program->dma_address[kick_index],
2539                                    false,
2540                                    dev_info);
2541 
2542          /* Src1 for DOUTD - Control Word. */
2543          pvr_pds_write_constant32(
2544             buffer_base,
2545             dma_control_constant32,
2546             program->dma_control[kick_index] |
2547                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2548 
2549          buffer += 3 * program->num_dma_kicks;
2550       }
2551    } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2552       if (program->clear_pds_barrier) {
2553          /* Zero the persistent temp (SW fence for context switch). */
2554          *buffer++ = pvr_pds_inst_encode_add64(
2555             0, /* cc */
2556             PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2557             PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2558             PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2559                (fence_constant_word >> 1), /* src0 = 0 */
2560             PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2561                (fence_constant_word >> 1), /* src1 = 0 */
2562             PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2563                                                             * ptemp[0]
2564                                                             */
2565       }
2566 
2567       if (total_num_doutw > 0) {
2568          for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2569             /* Set the coefficient register to data value. */
2570             *buffer++ = pvr_pds_encode_doutw64(
2571                /* cc */ 0,
2572                /* END */ !program->num_dma_kicks && !program->kick_usc &&
2573                   (i == total_num_doutw - 1),
2574                /* SRC1 */ doutw_control_constant32,
2575                /* SRC0 */ doutw_value_constant64 >> 1);
2576 
2577             doutw_value_constant64 += 2;
2578             doutw_control_constant32 += 1;
2579          }
2580 
2581          for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2582             /* Set the coefficient register to data value. */
2583             *buffer++ = pvr_pds_encode_doutw64(
2584                /* cc */ 0,
2585                /* END */ !program->num_dma_kicks && !program->kick_usc &&
2586                   (i == program->num_dword_doutw - 1),
2587                /* SRC1 */ doutw_control_constant32,
2588                /* SRC0 */ doutw_value_constant64 >> 1);
2589 
2590             doutw_value_constant64 += 1;
2591             doutw_control_constant32 += 1;
2592          }
2593       }
2594 
2595       if (program->num_dma_kicks != 0) {
2596          /* DMA the state into the secondary attributes. */
2597 
2598          if (program->num_dma_kicks == 1) /* Most-common case. */
2599          {
2600             *buffer++ = pvr_pds_encode_doutd(
2601                /* cc */ 0,
2602                /* END */ !program->kick_usc,
2603                /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2604                /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2605                                                          * Src0.
2606                                                          */
2607          } else {
2608             for (kick_index = 0; kick_index < program->num_dma_kicks;
2609                  kick_index++) {
2610                *buffer++ = pvr_pds_encode_doutd(
2611                   /* cc */ 0,
2612                   /* END */ (!program->kick_usc) &&
2613                      (kick_index + 1 == program->num_dma_kicks),
2614                   /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2615                                                       * Src1.
2616                                                       */
2617                   /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2618                                                             * 64-bit
2619                                                             * Src0.
2620                                                             */
2621                dma_address_constant64 += 2;
2622                dma_control_constant32 += 1;
2623             }
2624          }
2625       }
2626 
2627       if (program->kick_usc) {
2628          /* Kick the USC. */
2629          *buffer++ = pvr_pds_encode_doutu(
2630             /* cc */ 0,
2631             /* END */ 1,
2632             /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2633                                                       */
2634       }
2635 
2636       if (!program->kick_usc && program->num_dma_kicks == 0 &&
2637           total_num_doutw == 0) {
2638          *buffer++ = pvr_pds_inst_encode_halt(0);
2639       }
2640    }
2641 
2642    code_size = program->num_dma_kicks + total_num_doutw;
2643    if (program->clear_pds_barrier)
2644       code_size++; /* ADD64 instruction. */
2645 
2646    if (program->kick_usc)
2647       code_size++;
2648 
2649    /* If there are no DMAs and no USC kick then code is HALT only. */
2650    if (code_size == 0)
2651       code_size = 1;
2652 
2653    program->data_size = data_size;
2654    program->code_size = code_size;
2655 
2656    return buffer;
2657 }
2658 
2659 /**
2660  * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2661  * program.
2662  *
2663  * \param program Pointer to the PDS pixel shader secondary attributes program.
2664  * \param buffer Pointer to the buffer for the code/data.
2665  * \param gen_mode Either code or data can be generated or sizes only updated.
2666  * \returns Pointer to just beyond the buffer for the program/data.
2667  */
pvr_pds_pixel_shader_uniform_texture_code(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)2668 uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2669    struct pvr_pds_pixel_shader_sa_program *restrict program,
2670    uint32_t *restrict buffer,
2671    enum pvr_pds_generate_mode gen_mode)
2672 {
2673    uint32_t *instruction;
2674    uint32_t code_size = 0;
2675    uint32_t data_size = 0;
2676    uint32_t temps_used = 0;
2677    uint32_t next_constant;
2678 
2679    assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2680           0);
2681 
2682    assert((gen_mode == PDS_GENERATE_CODE_SEGMENT && buffer) ||
2683           gen_mode == PDS_GENERATE_SIZES);
2684 
2685    /* clang-format off */
2686    /* Shape of code segment (note: clear is different)
2687     *
2688     *      Code
2689     *    +------------+
2690     *    | BRA if0    |
2691     *    | DOUTD      |
2692     *    |  ...       |
2693     *    | DOUTD.halt |
2694     *    | uniform    |
2695     *    | DOUTD      |
2696     *    |  ...       |
2697     *    |  ...       |
2698     *    | DOUTW      |
2699     *    |  ...       |
2700     *    |  ...       |
2701     *    | DOUTU.halt |
2702     *    | HALT       |
2703     *    +------------+
2704     */
2705    /* clang-format on */
2706    instruction = buffer;
2707 
2708    next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2709 
2710    /* The clear color can arrive packed in the right form in the first (or
2711     * first 2) dwords of the shared registers and the program will issue a
2712     * single doutw for this.
2713     */
2714    if (program->clear && program->packed_clear) {
2715       uint32_t color_constant1 =
2716          pvr_pds_get_constants(&next_constant, 2, &data_size);
2717 
2718       uint32_t control_word_constant1 =
2719          pvr_pds_get_constants(&next_constant, 2, &data_size);
2720 
2721       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2722          /* DOUTW the clear color to the USC constants. Predicate with
2723           * uniform loading flag (IF0).
2724           */
2725          *instruction++ = pvr_pds_encode_doutw64(
2726             /* cc */ 1, /* Only for uniform loading program. */
2727             /* END */ program->kick_usc ? 0 : 1, /* Last
2728                                                   * instruction
2729                                                   * for a clear.
2730                                                   */
2731             /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2732             /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2733 
2734          code_size += 1;
2735       }
2736    } else if (program->clear) {
2737       uint32_t color_constant1, color_constant2;
2738 
2739       if (program->clear_color_dest_reg & 0x1) {
2740          uint32_t color_constant3, control_word_constant1,
2741             control_word_constant2, color_constant4;
2742 
2743          color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2744          color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2745          color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2746 
2747          control_word_constant1 =
2748             pvr_pds_get_constants(&next_constant, 2, &data_size);
2749          control_word_constant2 =
2750             pvr_pds_get_constants(&next_constant, 2, &data_size);
2751          color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2752 
2753          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2754             /* DOUTW the clear color to the USSE constants. Predicate with
2755              * uniform loading flag (IF0).
2756              */
2757             *instruction++ = pvr_pds_encode_doutw64(
2758                /* cc */ 1, /* Only for Uniform Loading program */
2759                /* END */ 0,
2760                /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2761                /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2762 
2763             *instruction++ = pvr_pds_encode_doutw64(
2764                /* cc */ 1, /* Only for Uniform Loading program */
2765                /* END */ 0,
2766                /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2767                /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2768 
2769             *instruction++ = pvr_pds_encode_doutw64(
2770                /* cc */ 1, /* Only for uniform loading program */
2771                /* END */ program->kick_usc ? 0 : 1, /* Last
2772                                                      * instruction
2773                                                      * for a clear.
2774                                                      */
2775                /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2776                /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2777          }
2778 
2779          code_size += 3;
2780       } else {
2781          uint32_t control_word_constant, control_word_last_constant;
2782 
2783          /* Put the clear color and control words into the first 8
2784           * constants.
2785           */
2786          color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2787          color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2788          control_word_constant =
2789             pvr_pds_get_constants(&next_constant, 2, &data_size);
2790          control_word_last_constant =
2791             pvr_pds_get_constants(&next_constant, 2, &data_size);
2792 
2793          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2794             /* DOUTW the clear color to the USSE constants. Predicate with
2795              * uniform loading flag (IF0).
2796              */
2797             *instruction++ = pvr_pds_encode_doutw64(
2798                /* cc */ 1, /* Only for Uniform Loading program */
2799                /* END */ 0,
2800                /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2801                /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2802 
2803             *instruction++ = pvr_pds_encode_doutw64(
2804                /* cc */ 1, /* Only for uniform loading program */
2805                /* END */ program->kick_usc ? 0 : 1, /* Last
2806                                                      * instruction
2807                                                      * for a clear.
2808                                                      */
2809                /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2810                                                        */
2811                /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2812          }
2813 
2814          code_size += 2;
2815       }
2816 
2817       if (program->kick_usc) {
2818          uint32_t doutu_constant64;
2819 
2820          doutu_constant64 =
2821             pvr_pds_get_constants(&next_constant, 2, &data_size);
2822 
2823          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2824             /* Issue the task to the USC.
2825              *
2826              * dout ds1[constant_use], ds0[constant_use],
2827              * ds1[constant_use], emit
2828              */
2829             *instruction++ = pvr_pds_encode_doutu(
2830                /* cc */ 0,
2831                /* END */ 1,
2832                /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2833                                                    */
2834          }
2835 
2836          code_size += 1;
2837       }
2838 
2839       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2840          /* End the program. */
2841          *instruction++ = pvr_pds_inst_encode_halt(0);
2842       }
2843       code_size += 1;
2844    } else {
2845       uint32_t total_num_doutw =
2846          program->num_dword_doutw + program->num_q_word_doutw;
2847       bool both_textures_and_uniforms =
2848          ((program->num_texture_dma_kicks > 0) &&
2849           ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2850            program->kick_usc));
2851       uint32_t doutu_constant64 = 0;
2852 
2853       if (both_textures_and_uniforms) {
2854          /* If the size of a PDS data section is 0, the hardware won't run
2855           * it. We therefore don't need to branch when there is only a
2856           * texture OR a uniform update program.
2857           */
2858          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2859             uint32_t branch_address =
2860                MAX2(1 + program->num_texture_dma_kicks, 2);
2861 
2862             /* Use If0 to BRAnch to uniform code. */
2863             *instruction++ = pvr_pds_encode_bra(
2864                /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2865                /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2866                /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2867                /* ADDR */ branch_address);
2868          }
2869 
2870          code_size += 1;
2871       }
2872 
2873       if (program->num_texture_dma_kicks > 0) {
2874          uint32_t dma_address_constant64;
2875          uint32_t dma_control_constant32;
2876          /* Allocate 3 constant spaces for each kick. The 64-bit constants
2877           * come first followed by the 32-bit constants.
2878           */
2879          dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2880          dma_control_constant32 =
2881             dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2882 
2883          for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2884             code_size += 1;
2885             if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2886                continue;
2887 
2888             /* DMA the state into the secondary attributes. */
2889             *instruction++ = pvr_pds_encode_doutd(
2890                /* cc */ 0,
2891                /* END */ dma == (program->num_texture_dma_kicks - 1),
2892                /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2893                /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2894                                                          * 64-bit
2895                                                          * Src0
2896                                                          */
2897             dma_address_constant64 += 2;
2898             dma_control_constant32 += 1;
2899          }
2900       } else if (both_textures_and_uniforms) {
2901          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2902             /* End the program. */
2903             *instruction++ = pvr_pds_inst_encode_halt(0);
2904          }
2905 
2906          code_size += 1;
2907       }
2908 
2909       /* Reserve space at the beginning of the data segment for the DOUTU Task
2910        * Control if one is needed.
2911        */
2912       if (program->kick_usc) {
2913          doutu_constant64 =
2914             pvr_pds_get_constants(&next_constant, 2, &data_size);
2915       }
2916 
2917       /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2918        * 64-bit constants come first followed by the 32-bit constants.
2919        */
2920       uint32_t total_size_dma =
2921          program->num_dword_doutw + 2 * program->num_q_word_doutw;
2922 
2923       uint32_t dma_address_constant64 = pvr_pds_get_constants(
2924          &next_constant,
2925          program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2926          &data_size);
2927       uint32_t doutw_value_constant64 =
2928          dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2929       uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2930       uint32_t doutw_control_constant32 =
2931          dma_control_constant32 + program->num_uniform_dma_kicks;
2932 
2933       if (total_num_doutw > 0) {
2934          pvr_pds_get_constants(&next_constant, 0, &data_size);
2935 
2936          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2937             for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2938                /* Set the coefficient register to data value. */
2939                *instruction++ = pvr_pds_encode_doutw64(
2940                   /* cc */ 0,
2941                   /* END */ !program->num_uniform_dma_kicks &&
2942                      !program->kick_usc && (i == total_num_doutw - 1),
2943                   /* SRC1 */ doutw_control_constant32,
2944                   /* SRC0 */ doutw_value_constant64 >> 1);
2945 
2946                doutw_value_constant64 += 2;
2947                doutw_control_constant32 += 1;
2948             }
2949 
2950             for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2951                /* Set the coefficient register to data value. */
2952                *instruction++ = pvr_pds_encode_doutw64(
2953                   /* cc */ 0,
2954                   /* END */ !program->num_uniform_dma_kicks &&
2955                      !program->kick_usc && (i == program->num_dword_doutw - 1),
2956                   /* SRC1 */ doutw_control_constant32,
2957                   /* SRC0 */ doutw_value_constant64 >> 1);
2958 
2959                doutw_value_constant64 += 1;
2960                doutw_control_constant32 += 1;
2961             }
2962          }
2963          code_size += total_num_doutw;
2964       }
2965 
2966       if (program->num_uniform_dma_kicks > 0) {
2967          for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2968             code_size += 1;
2969 
2970             if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2971                continue;
2972 
2973             bool last_instruction = false;
2974             if (!program->kick_usc &&
2975                 (dma == program->num_uniform_dma_kicks - 1)) {
2976                last_instruction = true;
2977             }
2978             /* DMA the state into the secondary attributes. */
2979             *instruction++ = pvr_pds_encode_doutd(
2980                /* cc */ 0,
2981                /* END */ last_instruction,
2982                /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2983                                                    */
2984                /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2985                                                          * 64-bit
2986                                                          * Src0
2987                                                          */
2988             dma_address_constant64 += 2;
2989             dma_control_constant32 += 1;
2990          }
2991       }
2992 
2993       if (program->kick_usc) {
2994          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2995             /* Issue the task to the USC.
2996              *
2997              * dout ds1[constant_use], ds0[constant_use],
2998              * ds1[constant_use], emit
2999              */
3000 
3001             *instruction++ = pvr_pds_encode_doutu(
3002                /* cc */ 0,
3003                /* END */ 1,
3004                /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
3005          }
3006 
3007          code_size += 1;
3008       } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
3009          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3010             /* End the program. */
3011             *instruction++ = pvr_pds_inst_encode_halt(0);
3012          }
3013 
3014          code_size += 1;
3015       }
3016    }
3017 
3018    /* Minimum temp count is 1. */
3019    program->temps_used = MAX2(temps_used, 1);
3020    program->code_size = code_size;
3021 
3022    if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3023       return instruction;
3024    else
3025       return NULL;
3026 }
3027 
3028 /**
3029  * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3030  * program.
3031  *
3032  * \param program Pointer to the PDS pixel shader secondary attributes program.
3033  * \param buffer Pointer to the buffer for the code/data.
3034  * \param gen_mode Either code or data can be generated or sizes only updated.
3035  * \param dev_info PVR device information struct.
3036  * \returns Pointer to just beyond the buffer for the program/data.
3037  */
pvr_pds_pixel_shader_uniform_texture_data(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,bool uniform,const struct pvr_device_info * dev_info)3038 uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3039    struct pvr_pds_pixel_shader_sa_program *restrict program,
3040    uint32_t *restrict buffer,
3041    enum pvr_pds_generate_mode gen_mode,
3042    bool uniform,
3043    const struct pvr_device_info *dev_info)
3044 {
3045    uint32_t *constants = buffer;
3046    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3047    uint32_t temps_used = 0;
3048    uint32_t data_size = 0;
3049 
3050    assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3051           0);
3052 
3053    assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3054 
3055    /* Shape of data segment (note: clear is different).
3056     *
3057     *        Uniform            Texture
3058     *    +--------------+   +-------------+
3059     *    | USC Task   L |   | USC Task  L |
3060     *    |            H |   |           H |
3061     *    | DMA1 Src0  L |   | DMA1 Src0 L |
3062     *    |            H |   |           H |
3063     *    | DMA2 Src0  L |   |             |
3064     *    |            H |   |             |
3065     *    | DMA1 Src1    |   | DMA1 Src1   |
3066     *    | DMA2 Src1    |   |             |
3067     *    | DOUTW0 Src1  |   |             |
3068     *    | DOUTW1 Src1  |   |             |
3069     *    |   ...        |   |             |
3070     *    | DOUTWn Srcn  |   |             |
3071     *    | other data   |   |             |
3072     *    +--------------+   +-------------+
3073     */
3074 
3075    /* Generate the PDS pixel shader secondary attributes data.
3076     *
3077     * Packed Clear
3078     * The clear color can arrive packed in the right form in the first (or
3079     * first 2) dwords of the shared registers and the program will issue a
3080     * single DOUTW for this.
3081     */
3082    if (program->clear && uniform && program->packed_clear) {
3083       uint32_t color_constant1 =
3084          pvr_pds_get_constants(&next_constant, 2, &data_size);
3085 
3086       uint32_t control_word_constant1 =
3087          pvr_pds_get_constants(&next_constant, 2, &data_size);
3088 
3089       if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3090          uint32_t doutw;
3091 
3092          pvr_pds_write_constant64(constants,
3093                                   color_constant1,
3094                                   program->clear_color[0],
3095                                   program->clear_color[1]);
3096 
3097          /* Load into first constant in common store. */
3098          doutw = pvr_pds_encode_doutw_src1(
3099             program->clear_color_dest_reg,
3100             PVR_PDS_DOUTW_LOWER64,
3101             PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3102             false,
3103             dev_info);
3104 
3105          /* Set the last flag. */
3106          doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3107          pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3108       }
3109    } else if (program->clear && uniform) {
3110       uint32_t color_constant1, color_constant2;
3111 
3112       if (program->clear_color_dest_reg & 0x1) {
3113          uint32_t color_constant3, control_word_constant1,
3114             control_word_constant2, color_constant4;
3115 
3116          color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3117          color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3118          color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3119 
3120          control_word_constant1 =
3121             pvr_pds_get_constants(&next_constant, 2, &data_size);
3122          control_word_constant2 =
3123             pvr_pds_get_constants(&next_constant, 2, &data_size);
3124          color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3125 
3126          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3127             uint32_t doutw;
3128 
3129             pvr_pds_write_constant32(constants,
3130                                      color_constant1,
3131                                      program->clear_color[0]);
3132 
3133             pvr_pds_write_constant64(constants,
3134                                      color_constant2,
3135                                      program->clear_color[1],
3136                                      program->clear_color[2]);
3137 
3138             pvr_pds_write_constant32(constants,
3139                                      color_constant3,
3140                                      program->clear_color[3]);
3141 
3142             /* Load into first constant in common store. */
3143             doutw = pvr_pds_encode_doutw_src1(
3144                program->clear_color_dest_reg,
3145                PVR_PDS_DOUTW_LOWER32,
3146                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3147                false,
3148                dev_info);
3149 
3150             pvr_pds_write_constant64(constants,
3151                                      control_word_constant1,
3152                                      doutw,
3153                                      0);
3154 
3155             /* Move the destination register along. */
3156             doutw = pvr_pds_encode_doutw_src1(
3157                program->clear_color_dest_reg + 1,
3158                PVR_PDS_DOUTW_LOWER64,
3159                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3160                false,
3161                dev_info);
3162 
3163             pvr_pds_write_constant64(constants,
3164                                      control_word_constant2,
3165                                      doutw,
3166                                      0);
3167 
3168             /* Move the destination register along. */
3169             doutw = pvr_pds_encode_doutw_src1(
3170                program->clear_color_dest_reg + 3,
3171                PVR_PDS_DOUTW_LOWER32,
3172                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3173                false,
3174                dev_info);
3175 
3176             /* Set the last flag. */
3177             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3178             pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3179          }
3180       } else {
3181          uint32_t control_word_constant, control_word_last_constant;
3182 
3183          /* Put the clear color and control words into the first 8
3184           * constants.
3185           */
3186          color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3187          color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3188          control_word_constant =
3189             pvr_pds_get_constants(&next_constant, 2, &data_size);
3190          control_word_last_constant =
3191             pvr_pds_get_constants(&next_constant, 2, &data_size);
3192 
3193          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3194             uint32_t doutw;
3195             pvr_pds_write_constant64(constants,
3196                                      color_constant1,
3197                                      program->clear_color[0],
3198                                      program->clear_color[1]);
3199 
3200             pvr_pds_write_constant64(constants,
3201                                      color_constant2,
3202                                      program->clear_color[2],
3203                                      program->clear_color[3]);
3204 
3205             /* Load into first constant in common store. */
3206             doutw = pvr_pds_encode_doutw_src1(
3207                program->clear_color_dest_reg,
3208                PVR_PDS_DOUTW_LOWER64,
3209                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3210                false,
3211                dev_info);
3212 
3213             pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3214 
3215             /* Move the destination register along. */
3216             doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3217             doutw |= (program->clear_color_dest_reg + 2)
3218                      << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3219 
3220             /* Set the last flag. */
3221             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3222             pvr_pds_write_constant64(constants,
3223                                      control_word_last_constant,
3224                                      doutw,
3225                                      0);
3226          }
3227       }
3228 
3229       /* Constants for the DOUTU Task Control, if needed. */
3230       if (program->kick_usc) {
3231          uint32_t doutu_constant64 =
3232             pvr_pds_get_constants(&next_constant, 2, &data_size);
3233 
3234          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3235             pvr_pds_write_wide_constant(
3236                constants,
3237                doutu_constant64,
3238                program->usc_task_control.src0); /* 64-bit
3239                                                  */
3240             /* Src0 */
3241          }
3242       }
3243    } else {
3244       if (uniform) {
3245          /* Reserve space at the beginning of the data segment for the DOUTU
3246           * Task Control if one is needed.
3247           */
3248          if (program->kick_usc) {
3249             uint32_t doutu_constant64 =
3250                pvr_pds_get_constants(&next_constant, 2, &data_size);
3251 
3252             if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3253                pvr_pds_write_wide_constant(
3254                   constants,
3255                   doutu_constant64,
3256                   program->usc_task_control.src0); /* 64-bit Src0 */
3257             }
3258          }
3259 
3260          uint32_t total_num_doutw =
3261             program->num_dword_doutw + program->num_q_word_doutw;
3262          uint32_t total_size_dma =
3263             program->num_dword_doutw + 2 * program->num_q_word_doutw;
3264 
3265          /* Allocate 3 constant spaces for each kick. The 64-bit constants
3266           * come first followed by the 32-bit constants.
3267           */
3268          uint32_t dma_address_constant64 =
3269             pvr_pds_get_constants(&next_constant,
3270                                   program->num_uniform_dma_kicks * 3 +
3271                                      total_size_dma + total_num_doutw,
3272                                   &data_size);
3273          uint32_t doutw_value_constant64 =
3274             dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3275          uint32_t dma_control_constant32 =
3276             doutw_value_constant64 + total_size_dma;
3277          uint32_t doutw_control_constant32 =
3278             dma_control_constant32 + program->num_uniform_dma_kicks;
3279 
3280          if (total_num_doutw > 0) {
3281             if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3282                for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3283                   pvr_pds_write_constant64(
3284                      constants,
3285                      doutw_value_constant64,
3286                      program->q_word_doutw_value[2 * i],
3287                      program->q_word_doutw_value[2 * i + 1]);
3288                   pvr_pds_write_constant32(
3289                      constants,
3290                      doutw_control_constant32,
3291                      program->q_word_doutw_control[i] |
3292                         ((!program->num_uniform_dma_kicks &&
3293                           i == total_num_doutw - 1)
3294                             ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3295                             : 0));
3296 
3297                   doutw_value_constant64 += 2;
3298                   doutw_control_constant32 += 1;
3299                }
3300 
3301                for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3302                   pvr_pds_write_constant32(constants,
3303                                            doutw_value_constant64,
3304                                            program->dword_doutw_value[i]);
3305                   pvr_pds_write_constant32(
3306                      constants,
3307                      doutw_control_constant32,
3308                      program->dword_doutw_control[i] |
3309                         ((!program->num_uniform_dma_kicks &&
3310                           i == program->num_dword_doutw - 1)
3311                             ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3312                             : 0));
3313 
3314                   doutw_value_constant64 += 1;
3315                   doutw_control_constant32 += 1;
3316                }
3317             }
3318          }
3319 
3320          if (program->num_uniform_dma_kicks > 0) {
3321             uint32_t kick;
3322 
3323             if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3324                for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3325                     kick++) {
3326                   /* Copy the dma control words to constants. */
3327                   pvr_pds_write_dma_address(constants,
3328                                             dma_address_constant64,
3329                                             program->uniform_dma_address[kick],
3330                                             false,
3331                                             dev_info);
3332                   pvr_pds_write_constant32(constants,
3333                                            dma_control_constant32,
3334                                            program->uniform_dma_control[kick]);
3335 
3336                   dma_address_constant64 += 2;
3337                   dma_control_constant32 += 1;
3338                }
3339 
3340                pvr_pds_write_dma_address(constants,
3341                                          dma_address_constant64,
3342                                          program->uniform_dma_address[kick],
3343                                          false,
3344                                          dev_info);
3345                pvr_pds_write_constant32(
3346                   constants,
3347                   dma_control_constant32,
3348                   program->uniform_dma_control[kick] |
3349                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3350             }
3351          }
3352 
3353       } else if (program->num_texture_dma_kicks > 0) {
3354          /* Allocate 3 constant spaces for each kick. The 64-bit constants
3355           * come first followed by the 32-bit constants.
3356           */
3357          uint32_t dma_address_constant64 =
3358             pvr_pds_get_constants(&next_constant,
3359                                   program->num_texture_dma_kicks * 3,
3360                                   &data_size);
3361          uint32_t dma_control_constant32 =
3362             dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3363 
3364          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3365             uint32_t kick;
3366             for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3367                /* Copy the DMA control words to constants. */
3368                pvr_pds_write_dma_address(constants,
3369                                          dma_address_constant64,
3370                                          program->texture_dma_address[kick],
3371                                          false,
3372                                          dev_info);
3373 
3374                pvr_pds_write_constant32(constants,
3375                                         dma_control_constant32,
3376                                         program->texture_dma_control[kick]);
3377 
3378                dma_address_constant64 += 2;
3379                dma_control_constant32 += 1;
3380             }
3381 
3382             pvr_pds_write_dma_address(constants,
3383                                       dma_address_constant64,
3384                                       program->texture_dma_address[kick],
3385                                       false,
3386                                       dev_info);
3387 
3388             pvr_pds_write_constant32(
3389                constants,
3390                dma_control_constant32,
3391                program->texture_dma_control[kick] |
3392                   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3393          }
3394       }
3395    }
3396 
3397    /* Save the data segment pointer and size. */
3398    program->data_segment = constants;
3399 
3400    /* Minimum temp count is 1. */
3401    program->temps_used = MAX2(temps_used, 1);
3402    program->data_size = data_size;
3403 
3404    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3405       return (constants + next_constant);
3406    else
3407       return NULL;
3408 }
3409 
3410 /**
3411  * Generates generic DOUTC PDS program.
3412  *
3413  * \param program Pointer to the PDS kick USC.
3414  * \param buffer Pointer to the buffer for the program.
3415  * \param gen_mode Either code and data can be generated, or sizes only updated.
3416  * \returns Pointer to just beyond the buffer for the code or program segment.
3417  */
pvr_pds_generate_doutc(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3418 uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3419                                  uint32_t *restrict buffer,
3420                                  enum pvr_pds_generate_mode gen_mode)
3421 {
3422    uint32_t constant = 0;
3423 
3424    /* Automatically get a data size of 1x 128bit chunks. */
3425    uint32_t data_size = 0, code_size = 0;
3426 
3427    /* Setup the data part. */
3428    uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3429    uint32_t *instruction = buffer;
3430    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3431                                                            * dwords.
3432                                                            */
3433 
3434    /* Update the program sizes. */
3435    program->data_size = data_size;
3436    program->code_size = code_size;
3437    program->data_segment = constants;
3438 
3439    if (gen_mode == PDS_GENERATE_SIZES)
3440       return NULL;
3441 
3442    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3443       /* Copy the USC task control words to constants. */
3444 
3445       constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3446       pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3447                                                                 * Src0
3448                                                                 */
3449 
3450       uint32_t control_word_constant =
3451          pvr_pds_get_constants(&next_constant, 2, &data_size);
3452       pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3453                                                                          * Src1
3454                                                                          */
3455 
3456       program->data_size = data_size;
3457       buffer += data_size;
3458 
3459       return buffer;
3460    } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3461       *instruction++ = pvr_pds_inst_encode_doutc(
3462          /* cc */ 0,
3463          /* END */ 0);
3464 
3465       code_size++;
3466 
3467       /* End the program. */
3468       *instruction++ = pvr_pds_inst_encode_halt(0);
3469       code_size++;
3470 
3471       program->code_size = code_size;
3472    }
3473 
3474    return instruction;
3475 }
3476 
3477 /**
3478  * Generates generic kick DOUTU PDS program in a single data+code block.
3479  *
3480  * \param control Pointer to the PDS kick USC.
3481  * \param buffer Pointer to the buffer for the program.
3482  * \param gen_mode Either code and data can be generated or sizes only updated.
3483  * \param dev_info PVR device information structure.
3484  * \returns Pointer to just beyond the buffer for the code or program segment.
3485  */
pvr_pds_generate_doutw(struct pvr_pds_doutw_control * restrict control,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3486 uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3487                                  uint32_t *restrict buffer,
3488                                  enum pvr_pds_generate_mode gen_mode,
3489                                  const struct pvr_device_info *dev_info)
3490 {
3491    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3492    uint32_t doutw;
3493    uint32_t data_size = 0, code_size = 0;
3494    uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3495    uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3496 
3497    /* Assert if buffer is exceeded. */
3498    assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3499 
3500    uint32_t *constants = buffer;
3501    uint32_t *instruction = buffer;
3502 
3503    /* Put the constants and control words interleaved in the data region. */
3504    for (uint32_t const_pair = 0; const_pair < control->num_const64;
3505         const_pair++) {
3506       constant[const_pair] =
3507          pvr_pds_get_constants(&next_constant, 2, &data_size);
3508       control_word_constant[const_pair] =
3509          pvr_pds_get_constants(&next_constant, 2, &data_size);
3510    }
3511 
3512    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3513       /* Data segment points to start of constants. */
3514       control->data_segment = constants;
3515 
3516       for (uint32_t const_pair = 0; const_pair < control->num_const64;
3517            const_pair++) {
3518          pvr_pds_write_constant64(constants,
3519                                   constant[const_pair],
3520                                   H32(control->doutw_data[const_pair]),
3521                                   L32(control->doutw_data[const_pair]));
3522 
3523          /* Start loading at offset 0. */
3524          if (control->dest_store == PDS_COMMON_STORE) {
3525             doutw = pvr_pds_encode_doutw_src1(
3526                (2 * const_pair),
3527                PVR_PDS_DOUTW_LOWER64,
3528                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3529                false,
3530                dev_info);
3531          } else {
3532             doutw = pvr_pds_encode_doutw_src1(
3533                (2 * const_pair),
3534                PVR_PDS_DOUTW_LOWER64,
3535                PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3536                false,
3537                dev_info);
3538          }
3539 
3540          if (const_pair + 1 == control->num_const64) {
3541             /* Set the last flag for the MCU (assume there are no following
3542              * DOUTD's).
3543              */
3544             doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3545          }
3546          pvr_pds_write_constant64(constants,
3547                                   control_word_constant[const_pair],
3548                                   doutw,
3549                                   0);
3550       }
3551 
3552       control->data_size = data_size;
3553    } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3554       /* Code section. */
3555 
3556       for (uint32_t const_pair = 0; const_pair < control->num_const64;
3557            const_pair++) {
3558          /* DOUTW the PDS data to the USC constants. */
3559          *instruction++ = pvr_pds_encode_doutw64(
3560             /* cc */ 0,
3561             /* END */ control->last_instruction &&
3562                (const_pair + 1 == control->num_const64),
3563             /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3564                                                            * Src1.
3565                                                            */
3566             /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3567 
3568          code_size++;
3569       }
3570 
3571       if (control->last_instruction) {
3572          /* End the program. */
3573          *instruction++ = pvr_pds_inst_encode_halt(0);
3574          code_size++;
3575       }
3576 
3577       control->code_size = code_size;
3578    }
3579 
3580    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3581       return (constants + next_constant);
3582    else
3583       return instruction;
3584 }
3585 
3586 /**
3587  * Generates generic kick DOUTU PDS program in a single data+code block.
3588  *
3589  * \param program Pointer to the PDS kick USC.
3590  * \param buffer Pointer to the buffer for the program.
3591  * \param start_next_constant Next constant in data segment. Non-zero if another
3592  *                            instruction precedes the DOUTU.
3593  * \param cc_enabled If true then the DOUTU is predicated (cc set).
3594  * \param gen_mode Either code and data can be generated or sizes only updated.
3595  * \returns Pointer to just beyond the buffer for the code or program segment.
3596  */
pvr_pds_kick_usc(struct pvr_pds_kickusc_program * restrict program,uint32_t * restrict buffer,uint32_t start_next_constant,bool cc_enabled,enum pvr_pds_generate_mode gen_mode)3597 uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3598                            uint32_t *restrict buffer,
3599                            uint32_t start_next_constant,
3600                            bool cc_enabled,
3601                            enum pvr_pds_generate_mode gen_mode)
3602 {
3603    uint32_t constant = 0;
3604 
3605    /* Automatically get a data size of 2 128bit chunks. */
3606    uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3607    uint32_t code_size = 1; /* Single doutu */
3608    uint32_t dummy_count = 0;
3609 
3610    /* Setup the data part. */
3611    uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3612    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3613                                                            * dwords.
3614                                                            */
3615 
3616    /* Update the program sizes. */
3617    program->data_size = data_size;
3618    program->code_size = code_size;
3619    program->data_segment = constants;
3620 
3621    if (gen_mode == PDS_GENERATE_SIZES)
3622       return NULL;
3623 
3624    if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3625        gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3626       /* Copy the USC task control words to constants. */
3627 
3628       constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3629 
3630       pvr_pds_write_wide_constant(constants,
3631                                   constant + 0,
3632                                   program->usc_task_control.src0); /* 64-bit
3633                                                                     * Src0.
3634                                                                     */
3635       buffer += data_size;
3636 
3637       if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3638          return buffer;
3639    }
3640 
3641    if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3642        gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3643       /* Generate the PDS pixel shader code. */
3644 
3645       /* Setup the instruction pointer. */
3646       uint32_t *instruction = buffer;
3647 
3648       /* Issue the task to the USC.
3649        *
3650        * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3651        * halt halt
3652        */
3653 
3654       *instruction++ = pvr_pds_encode_doutu(
3655          /* cc */ cc_enabled,
3656          /* END */ 1,
3657          /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3658                                                              * 64-bit Src0
3659                                                              */
3660 
3661       /* Return pointer to just after last instruction. */
3662       return instruction;
3663    }
3664 
3665    /* Execution should never reach here; keep compiler happy. */
3666    return NULL;
3667 }
3668 
pvr_pds_generate_compute_barrier_conditional(uint32_t * buffer,enum pvr_pds_generate_mode gen_mode)3669 uint32_t *pvr_pds_generate_compute_barrier_conditional(
3670    uint32_t *buffer,
3671    enum pvr_pds_generate_mode gen_mode)
3672 {
3673    /* Compute barriers supported. Need to test for coeff sync task. */
3674 
3675    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3676       return buffer; /* No data segment. */
3677 
3678    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3679       /* Test whether this is the coefficient update task or not. */
3680       *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3681                                                                        */
3682                                      PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3683                                                                          */
3684                                      PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3685                                                                        */
3686                                      1 /* ADDR */);
3687 
3688       /* Encode a HALT. */
3689       *buffer++ = pvr_pds_inst_encode_halt(1);
3690 
3691       /* Reset the default predicate to IF0. */
3692       *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3693                                                                        */
3694                                      PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3695                                                                          */
3696                                      PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3697                                                                        */
3698                                      1 /* ADDR */);
3699    }
3700 
3701    return buffer;
3702 }
3703 
3704 /**
3705  * Generates program to kick the USC task to store shared.
3706  *
3707  * \param program Pointer to the PDS shared register.
3708  * \param buffer Pointer to the buffer for the program.
3709  * \param gen_mode Either code and data can be generated or sizes only updated.
3710  * \param dev_info PVR device information structure.
3711  * \returns Pointer to just beyond the buffer for the program.
3712  */
pvr_pds_generate_shared_storing_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3713 uint32_t *pvr_pds_generate_shared_storing_program(
3714    struct pvr_pds_shared_storing_program *restrict program,
3715    uint32_t *restrict buffer,
3716    enum pvr_pds_generate_mode gen_mode,
3717    const struct pvr_device_info *dev_info)
3718 {
3719    struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3720    struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3721 
3722    if (gen_mode == PDS_GENERATE_SIZES)
3723       return NULL;
3724 
3725    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3726       uint32_t *constants = buffer;
3727 
3728       constants =
3729          pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3730       program->data_size = doutw_control->data_size;
3731 
3732       constants = pvr_pds_kick_usc(kick_usc_program,
3733                                    constants,
3734                                    0,
3735                                    program->cc_enable,
3736                                    gen_mode);
3737       program->data_size += kick_usc_program->data_size;
3738 
3739       return constants;
3740    }
3741 
3742    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3743       /* Generate PDS code segment. */
3744       uint32_t *instruction = buffer;
3745 
3746       /* doutw	vi1, vi0
3747        * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3748        * emit
3749        */
3750       instruction =
3751          pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3752       program->code_size = doutw_control->code_size;
3753 
3754       /* Offset into data segment follows on from doutw data segment. */
3755       instruction = pvr_pds_kick_usc(kick_usc_program,
3756                                      instruction,
3757                                      doutw_control->data_size,
3758                                      program->cc_enable,
3759                                      gen_mode);
3760       program->code_size += kick_usc_program->code_size;
3761 
3762       return instruction;
3763    }
3764 
3765    /* Execution should never reach here. */
3766    return NULL;
3767 }
3768 
pvr_pds_generate_fence_terminate_program(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3769 uint32_t *pvr_pds_generate_fence_terminate_program(
3770    struct pvr_pds_fence_program *restrict program,
3771    uint32_t *restrict buffer,
3772    enum pvr_pds_generate_mode gen_mode,
3773    const struct pvr_device_info *dev_info)
3774 {
3775    uint32_t data_size = 0;
3776    uint32_t code_size = 0;
3777 
3778    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3779       /* Data segment. */
3780       uint32_t *constants, *constants_base;
3781 
3782       constants = constants_base = (uint32_t *)buffer;
3783 
3784       /* DOUTC sources are not used, but they must be valid. */
3785       pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3786       data_size += program->data_size;
3787 
3788       if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3789          /* Append a 64-bit constant with value 1. Used to increment ptemp.
3790           * Return the offset into the data segment.
3791           */
3792          program->fence_constant_word =
3793             pvr_pds_append_constant64(constants_base, 1, &data_size);
3794       }
3795 
3796       program->data_size = data_size;
3797       return constants;
3798    }
3799 
3800    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3801       /* Code segment. */
3802       uint32_t *instruction = (uint32_t *)buffer;
3803 
3804       instruction = pvr_pds_generate_compute_barrier_conditional(
3805          instruction,
3806          PDS_GENERATE_CODE_SEGMENT);
3807       code_size += 3;
3808 
3809       if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3810          /* lock */
3811          *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3812 
3813          /* add64	pt[0], pt[0], #1 */
3814          *instruction++ = pvr_pds_inst_encode_add64(
3815             0, /* cc */
3816             PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3817             PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3818             PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3819                                                          */
3820             PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3821                (program->fence_constant_word >> 1), /* src1 = 1 */
3822             PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3823                                                             * ptemp[0]
3824                                                             */
3825 
3826          /* release */
3827          *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3828 
3829          /* cmp		pt[0] EQ 0x4 == Number of USC clusters per phantom */
3830          *instruction++ = pvr_pds_inst_encode_cmpi(
3831             0, /* cc */
3832             PVR_ROGUE_PDSINST_COP_EQ,
3833             PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3834                                                            * = ptemp[0]
3835                                                            */
3836             PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3837 
3838          /* bra		-1 */
3839          *instruction++ =
3840             pvr_pds_encode_bra(0, /* cc */
3841                                1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3842                                    */
3843                                0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3844                                    */
3845                                -1); /* bra PC */
3846          code_size += 5;
3847       }
3848 
3849       /* DOUTC */
3850       instruction = pvr_pds_generate_doutc(program,
3851                                            instruction,
3852                                            PDS_GENERATE_CODE_SEGMENT);
3853       code_size += program->code_size;
3854 
3855       program->code_size = code_size;
3856       return instruction;
3857    }
3858 
3859    /* Execution should never reach here. */
3860    return NULL;
3861 }
3862 
3863 /**
3864  * Generates program to kick the USC task to load shared registers from memory.
3865  *
3866  * \param program Pointer to the PDS shared register.
3867  * \param buffer Pointer to the buffer for the program.
3868  * \param gen_mode Either code and data can be generated or sizes only updated.
3869  * \param dev_info PVR device information struct.
3870  * \returns Pointer to just beyond the buffer for the program.
3871  */
pvr_pds_generate_compute_shared_loading_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3872 uint32_t *pvr_pds_generate_compute_shared_loading_program(
3873    struct pvr_pds_shared_storing_program *restrict program,
3874    uint32_t *restrict buffer,
3875    enum pvr_pds_generate_mode gen_mode,
3876    const struct pvr_device_info *dev_info)
3877 {
3878    struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3879    struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3880 
3881    uint32_t next_constant;
3882    uint32_t data_size = 0;
3883    uint32_t code_size = 0;
3884 
3885    /* This needs to persist to the CODE_SEGMENT call. */
3886    static uint32_t fence_constant_word = 0;
3887    uint64_t zero_constant64 = 0;
3888 
3889    if (gen_mode == PDS_GENERATE_SIZES)
3890       return NULL;
3891 
3892    if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3893       uint32_t *constants = buffer;
3894 
3895       constants = pvr_pds_generate_doutw(doutw_control,
3896                                          constants,
3897                                          PDS_GENERATE_DATA_SEGMENT,
3898                                          dev_info);
3899       data_size += doutw_control->data_size;
3900 
3901       constants = pvr_pds_kick_usc(kick_usc_program,
3902                                    constants,
3903                                    0,
3904                                    program->cc_enable,
3905                                    gen_mode);
3906       data_size += kick_usc_program->data_size;
3907 
3908       /* Copy the fence constant value (64-bit). */
3909       next_constant = data_size; /* Assumes data words fully packed. */
3910       fence_constant_word =
3911          pvr_pds_get_constants(&next_constant, 2, &data_size);
3912 
3913       /* Encode the fence constant src0 (offset measured from start of data
3914        * buffer). Fence barrier is initialized to zero.
3915        */
3916       pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3917       /* Update the const size. */
3918       data_size += 2;
3919       constants += 2;
3920 
3921       program->data_size = data_size;
3922       return constants;
3923    }
3924 
3925    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3926       /* Generate PDS code segment. */
3927       uint32_t *instruction = buffer;
3928 
3929       /* add64	pt0, c0, c0
3930        * IF [2x Phantoms]
3931        * add64	pt1, c0, c0
3932        * st		[constant_mem_addr], pt0, 4
3933        * ENDIF
3934        * doutw	vi1, vi0
3935        * doutu	ds1[constant_use], ds0[constant_use], ds1[constant_use],
3936        * emit
3937        *
3938        * Zero the persistent temp (SW fence for context switch).
3939        */
3940       *instruction++ = pvr_pds_inst_encode_add64(
3941          0, /* cc */
3942          PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3943          PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3944          PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3945             (fence_constant_word >> 1), /* src0
3946                                          *  = 0
3947                                          */
3948          PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3949             (fence_constant_word >> 1), /* src1
3950                                          * = 0
3951                                          */
3952          PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3953                                                          */
3954       code_size++;
3955 
3956       instruction = pvr_pds_generate_doutw(doutw_control,
3957                                            instruction,
3958                                            PDS_GENERATE_CODE_SEGMENT,
3959                                            dev_info);
3960       code_size += doutw_control->code_size;
3961 
3962       /* Offset into data segment follows on from doutw data segment. */
3963       instruction = pvr_pds_kick_usc(kick_usc_program,
3964                                      instruction,
3965                                      doutw_control->data_size,
3966                                      program->cc_enable,
3967                                      gen_mode);
3968       code_size += kick_usc_program->code_size;
3969 
3970       program->code_size = code_size;
3971       return instruction;
3972    }
3973 
3974    /* Execution should never reach here. */
3975    return NULL;
3976 }
3977 
3978 /**
3979  * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3980  * Relies on num_fpu_iterators being initialized for size calculation.
3981  * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3982  * initialized for program generation.
3983  *
3984  * \param program Pointer to the PDS pixel shader program.
3985  * \param buffer Pointer to the buffer for the program.
3986  * \param gen_mode Either code and data can be generated or sizes only updated.
3987  * \returns Pointer to just beyond the buffer for the program.
3988  */
pvr_pds_coefficient_loading(struct pvr_pds_coeff_loading_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3989 uint32_t *pvr_pds_coefficient_loading(
3990    struct pvr_pds_coeff_loading_program *restrict program,
3991    uint32_t *restrict buffer,
3992    enum pvr_pds_generate_mode gen_mode)
3993 {
3994    uint32_t constant;
3995    uint32_t *instruction;
3996    uint32_t total_data_size, code_size;
3997 
3998    /* Place constants at the front of the buffer. */
3999    uint32_t *constants = buffer;
4000    /* Start counting constants from 0. */
4001    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4002 
4003    /* Save the data segment pointer and size. */
4004    program->data_segment = constants;
4005 
4006    total_data_size = 0;
4007    code_size = 0;
4008 
4009    total_data_size += 2 * program->num_fpu_iterators;
4010    code_size += program->num_fpu_iterators;
4011 
4012    /* Instructions start where constants finished, but we must take note of
4013     * alignment.
4014     *
4015     * 128-bit boundary = 4 dwords.
4016     */
4017    total_data_size = ALIGN_POT(total_data_size, 4);
4018    if (gen_mode != PDS_GENERATE_SIZES) {
4019       uint32_t data_size = 0;
4020       uint32_t iterator = 0;
4021 
4022       instruction = buffer + total_data_size;
4023 
4024       while (iterator < program->num_fpu_iterators) {
4025          uint64_t iterator_word;
4026 
4027          /* Copy the USC task control words to constants. */
4028          constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4029 
4030          /* Write the first iterator. */
4031          iterator_word =
4032             (uint64_t)program->FPU_iterators[iterator]
4033             << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4034 
4035          /* Write the destination. */
4036          iterator_word |=
4037             (uint64_t)program->destination[iterator++]
4038             << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4039 
4040          /* If this is the last DOUTI word the "Last Issue" bit should be
4041           * set.
4042           */
4043          if (iterator >= program->num_fpu_iterators) {
4044             iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4045          }
4046 
4047          /* Write the word to the buffer. */
4048          pvr_pds_write_wide_constant(constants,
4049                                      constant,
4050                                      iterator_word); /* 64-bit
4051                                                         Src0
4052                                                       */
4053 
4054          /* Write the DOUT instruction. */
4055          *instruction++ = pvr_pds_encode_douti(
4056             /* cc */ 0,
4057             /* END */ 0,
4058             /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4059       }
4060 
4061       /* Update the last DOUTI instruction to have the END flag set. */
4062       *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4063    } else {
4064       instruction = NULL;
4065    }
4066 
4067    /* Update the data size and code size. Minimum temp count is 1. */
4068    program->temps_used = 1;
4069    program->data_size = total_data_size;
4070    program->code_size = code_size;
4071 
4072    return instruction;
4073 }
4074 
4075 /**
4076  * Generate a single ld/st instruction. This can correspond to one or more
4077  * real ld/st instructions based on the value of count.
4078  *
4079  * \param ld true to generate load, false to generate store.
4080  * \param control Cache mode control.
4081  * \param temp_index Dest temp for load/source temp for store, in 32bits
4082  *                   register index.
4083  * \param address Source for load/dest for store in bytes.
4084  * \param count Number of dwords for load/store.
4085  * \param next_constant
4086  * \param total_data_size
4087  * \param total_code_size
4088  * \param buffer Pointer to the buffer for the program.
4089  * \param data_fence Issue data fence.
4090  * \param gen_mode Either code and data can be generated or sizes only updated.
4091  * \param dev_info PVR device information structure.
4092  * \returns Pointer to just beyond the buffer for the program.
4093  */
pvr_pds_generate_single_ldst_instruction(bool ld,const struct pvr_pds_ldst_control * control,uint32_t temp_index,uint64_t address,uint32_t count,uint32_t * next_constant,uint32_t * total_data_size,uint32_t * total_code_size,uint32_t * restrict buffer,bool data_fence,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4094 uint32_t *pvr_pds_generate_single_ldst_instruction(
4095    bool ld,
4096    const struct pvr_pds_ldst_control *control,
4097    uint32_t temp_index,
4098    uint64_t address,
4099    uint32_t count,
4100    uint32_t *next_constant,
4101    uint32_t *total_data_size,
4102    uint32_t *total_code_size,
4103    uint32_t *restrict buffer,
4104    bool data_fence,
4105    enum pvr_pds_generate_mode gen_mode,
4106    const struct pvr_device_info *dev_info)
4107 {
4108    /* A single ld/ST here does NOT actually correspond to a single ld/ST
4109     * instruction, but may needs multiple ld/ST instructions because each ld/ST
4110     * instruction can only ld/ST a restricted max number of dwords which may
4111     * less than count passed here.
4112     */
4113 
4114    uint32_t num_inst;
4115    uint32_t constant;
4116 
4117    if (ld) {
4118       /* ld must operate on 64bits unit, and it needs to load from and to 128
4119        * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4120        * 1, 2, ...) times 64bits unit.
4121        */
4122       uint32_t per_inst_count = 0;
4123       uint32_t last_inst_count;
4124 
4125       assert((gen_mode == PDS_GENERATE_SIZES) ||
4126              (((count % 2) == 0) && ((address % 16) == 0) &&
4127               (temp_index % 2) == 0));
4128 
4129       count >>= 1;
4130       temp_index >>= 1;
4131 
4132       /* Found out how many ld instructions are needed and ld size for the all
4133        * possible ld instructions.
4134        */
4135       if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4136          num_inst = 1;
4137          last_inst_count = count;
4138       } else {
4139          per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4140          if ((per_inst_count % 2) != 0)
4141             per_inst_count -= 1;
4142 
4143          num_inst = count / per_inst_count;
4144          last_inst_count = count - per_inst_count * num_inst;
4145          num_inst += 1;
4146       }
4147 
4148       /* Generate all the instructions. */
4149       for (uint32_t i = 0; i < num_inst; i++) {
4150          if ((i == (num_inst - 1)) && (last_inst_count == 0))
4151             break;
4152 
4153          /* A single load instruction. */
4154          constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4155 
4156          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4157             uint64_t ld_src0 = 0;
4158 
4159             ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4160                         << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4161             ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4162                                                         : per_inst_count) &
4163                          PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4164                         << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4165             ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4166                         << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4167 
4168             if (!control) {
4169                ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4170 
4171                if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4172                   ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4173 
4174             } else {
4175                ld_src0 |= control->cache_control_const;
4176             }
4177 
4178             /* Write it to the constant. */
4179             pvr_pds_write_constant64(buffer,
4180                                      constant,
4181                                      (uint32_t)(ld_src0),
4182                                      (uint32_t)(ld_src0 >> 32));
4183 
4184             /* Adjust value for next ld instruction. */
4185             temp_index += per_inst_count;
4186             address += (((uint64_t)(per_inst_count)) << 3);
4187          }
4188 
4189          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4190             *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4191 
4192             if (data_fence)
4193                *buffer++ = pvr_pds_inst_encode_wdf(0);
4194          }
4195       }
4196    } else {
4197       /* ST needs source memory address to be 32bits aligned. */
4198       assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4199 
4200       /* Found out how many ST instructions are needed, each ST can only store
4201        * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4202        */
4203       num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4204       num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4205 
4206       /* Generate all the instructions. */
4207       for (uint32_t i = 0; i < num_inst; i++) {
4208          /* A single store instruction. */
4209          constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4210 
4211          if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4212             uint32_t per_inst_count =
4213                (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4214                    ? count
4215                    : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4216             uint64_t st_src0 = 0;
4217 
4218             st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4219                         << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4220             st_src0 |=
4221                (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4222                 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4223             st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4224                         << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4225 
4226             if (!control) {
4227                st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4228 
4229                if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4230                   st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4231                }
4232 
4233             } else {
4234                st_src0 |= control->cache_control_const;
4235             }
4236 
4237             /* Write it to the constant. */
4238             pvr_pds_write_constant64(buffer,
4239                                      constant,
4240                                      (uint32_t)(st_src0),
4241                                      (uint32_t)(st_src0 >> 32));
4242 
4243             /* Adjust value for next ST instruction. */
4244             temp_index += per_inst_count;
4245             count -= per_inst_count;
4246             address += (((uint64_t)(per_inst_count)) << 2);
4247          }
4248 
4249          if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4250             *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4251 
4252             if (data_fence)
4253                *buffer++ = pvr_pds_inst_encode_wdf(0);
4254          }
4255       }
4256    }
4257 
4258    (*total_code_size) += num_inst;
4259    if (data_fence)
4260       (*total_code_size) += num_inst;
4261 
4262    if (gen_mode != PDS_GENERATE_SIZES)
4263       return buffer;
4264    return NULL;
4265 }
4266 
4267 /**
4268  * Generate programs used to prepare stream out, i.e., clear stream out buffer
4269  * overflow flags and update Persistent temps by a ld instruction.
4270  *
4271  * This must be used in PPP state update.
4272  *
4273  * \param program Pointer to the stream out program.
4274  * \param buffer Pointer to the buffer for the program.
4275  * \param store_mode If true then the data is stored to memory. If false then
4276  *                   the data is loaded from memory.
4277  * \param gen_mode Either code and data can be generated or sizes only updated.
4278  * \param dev_info PVR device information structure.
4279  * \returns Pointer to just beyond the buffer for the program.
4280  */
pvr_pds_generate_stream_out_init_program(struct pvr_pds_stream_out_init_program * restrict program,uint32_t * restrict buffer,bool store_mode,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4281 uint32_t *pvr_pds_generate_stream_out_init_program(
4282    struct pvr_pds_stream_out_init_program *restrict program,
4283    uint32_t *restrict buffer,
4284    bool store_mode,
4285    enum pvr_pds_generate_mode gen_mode,
4286    const struct pvr_device_info *dev_info)
4287 {
4288    uint32_t total_data_size = 0;
4289    uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4290 
4291    /* Start counting constants from 0. */
4292    uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4293 
4294    uint32_t total_code_size = 1;
4295 
4296    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4297       /* We only need to clear global stream out predicate, other predicates
4298        * are not used during the stream out buffer overflow test.
4299        */
4300       *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4301    }
4302 
4303    for (uint32_t index = 0; index < program->num_buffers; index++) {
4304       if (program->dev_address_for_buffer_data[index] != 0) {
4305          /* Generate load/store program to load/store persistent temps. */
4306 
4307          /* NOTE: store_mode == true case should be handled by
4308           * StreamOutTerminate.
4309           */
4310          buffer = pvr_pds_generate_single_ldst_instruction(
4311             !store_mode,
4312             NULL,
4313             PTDst,
4314             program->dev_address_for_buffer_data[index],
4315             program->pds_buffer_data_size[index],
4316             &next_constant,
4317             &total_data_size,
4318             &total_code_size,
4319             buffer,
4320             false,
4321             gen_mode,
4322             dev_info);
4323       }
4324 
4325       PTDst += program->pds_buffer_data_size[index];
4326    }
4327 
4328    total_code_size += 2;
4329 
4330    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4331       /* We need to fence the loading. */
4332       *buffer++ = pvr_pds_inst_encode_wdf(0);
4333       *buffer++ = pvr_pds_inst_encode_halt(0);
4334    }
4335 
4336    /* Save size information to program */
4337    program->stream_out_init_pds_data_size =
4338       ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4339    /* PDS program code size. */
4340    program->stream_out_init_pds_code_size = total_code_size;
4341 
4342    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4343       return buffer + program->stream_out_init_pds_data_size;
4344    else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4345       return buffer;
4346 
4347    return NULL;
4348 }
4349 
4350 /**
4351  * Generate stream out terminate program for stream out.
4352  *
4353  * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4354  * will be stored.
4355  *
4356  * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4357  * will be stored into memory.
4358  *
4359  * The stream out terminate program is used to update the PPP state and the data
4360  * and code section cannot be separate.
4361  *
4362  * \param program Pointer to the stream out program.
4363  * \param buffer Pointer to the buffer for the program.
4364  * \param gen_mode Either code and data can be generated or sizes only updated.
4365  * \param dev_info PVR device info structure.
4366  * \returns Pointer to just beyond the buffer for the program.
4367  */
pvr_pds_generate_stream_out_terminate_program(struct pvr_pds_stream_out_terminate_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4368 uint32_t *pvr_pds_generate_stream_out_terminate_program(
4369    struct pvr_pds_stream_out_terminate_program *restrict program,
4370    uint32_t *restrict buffer,
4371    enum pvr_pds_generate_mode gen_mode,
4372    const struct pvr_device_info *dev_info)
4373 {
4374    uint32_t next_constant;
4375    uint32_t total_data_size = 0, total_code_size = 0;
4376 
4377    /* Start counting constants from 0. */
4378    next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4379 
4380    /* Generate store program to store persistent temps. */
4381    buffer = pvr_pds_generate_single_ldst_instruction(
4382       false,
4383       NULL,
4384       PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4385       program->dev_address_for_storing_persistent_temp,
4386       program->pds_persistent_temp_size_to_store,
4387       &next_constant,
4388       &total_data_size,
4389       &total_code_size,
4390       buffer,
4391       false,
4392       gen_mode,
4393       dev_info);
4394 
4395    total_code_size += 2;
4396    if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4397       *buffer++ = pvr_pds_inst_encode_wdf(0);
4398       *buffer++ = pvr_pds_inst_encode_halt(0);
4399    }
4400 
4401    /* Save size information to program. */
4402    program->stream_out_terminate_pds_data_size =
4403       ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4404    /* PDS program code size. */
4405    program->stream_out_terminate_pds_code_size = total_code_size;
4406 
4407    if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4408       return buffer + program->stream_out_terminate_pds_data_size;
4409    else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4410       return buffer;
4411 
4412    return NULL;
4413 }
4414 
4415 /* DrawArrays works in several steps:
4416  *
4417  * 1) load data from draw_indirect buffer
4418  * 2) tweak data to match hardware formats
4419  * 3) write data to indexblock
4420  * 4) signal the VDM to continue
4421  *
4422  * This is complicated by HW limitations on alignment, as well as a HWBRN.
4423  *
4424  * 1) Load data.
4425  * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4426  * spec we must deal with this by choosing an appropriate earlier address and
4427  * loading enough dwords that we load the entirety of the buffer.
4428  *
4429  * if addr & 0xf:
4430  *   load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4431  *   data = tmp[0 + (uiAddr & 0xf) >> 2]...
4432  * else
4433  *   load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4434  *   data = tmp[0]...
4435  *
4436  *
4437  * 2) Tweak data.
4438  * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4439  * the VDM control stream. We must subtract 1 from the loaded primCount.
4440  *
4441  * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4442  * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4443  * into another tmp that has the correct alignment. Note: this is only required
4444  * when data = tmp[even], as primCount is data+1:
4445  *
4446  * if data = tmp[even]:
4447  *   primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4448  * else:
4449  *   primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4450  *
4451  * This boils down to:
4452  *
4453  * primCount = data[1]
4454  * primCountSrc = data[1]
4455  * if brn_present && (data is even):
4456  *   mov scratch, primCount
4457  *   primCountSrc = scratch
4458  * endif
4459  * sub primCount, primCountSrc, 1
4460  *
4461  * 3) Store Data.
4462  * Write the now-tweaked data over the top of the indexblock.
4463  * To ensure the write completes before the VDM re-reads the data, we must cause
4464  * a data hazard by doing a dummy (dummy meaning we don't care about the
4465  * returned data) load from the same addresses. Again, because the ld must
4466  * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4467  * index block is 128-bit aligned. This is the client driver's responsibility.
4468  *
4469  * st data[0, 1, 2] -> (idxblock + 4)
4470  * load [idxblock] 4 dwords
4471  *
4472  * 4) Signal the VDM
4473  * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4474  * where it is currently fenced on a dummy idxblock that has been inserted by
4475  * the driver.
4476  */
4477 
4478 #include "pvr_draw_indirect_arrays0.h"
4479 #include "pvr_draw_indirect_arrays1.h"
4480 #include "pvr_draw_indirect_arrays2.h"
4481 #include "pvr_draw_indirect_arrays3.h"
4482 
4483 #include "pvr_draw_indirect_arrays_base_instance0.h"
4484 #include "pvr_draw_indirect_arrays_base_instance1.h"
4485 #include "pvr_draw_indirect_arrays_base_instance2.h"
4486 #include "pvr_draw_indirect_arrays_base_instance3.h"
4487 
4488 #include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4489 #include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4490 #include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4491 #include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4492 
4493 #define ENABLE_SLC_MCU_CACHE_CONTROLS(device)        \
4494    ((device)->features.has_slc_mcu_cache_controls    \
4495        ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4496        : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4497 
pvr_pds_generate_draw_arrays_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4498 void pvr_pds_generate_draw_arrays_indirect(
4499    struct pvr_pds_drawindirect_program *restrict program,
4500    uint32_t *restrict buffer,
4501    enum pvr_pds_generate_mode gen_mode,
4502    const struct pvr_device_info *dev_info)
4503 {
4504    if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4505        (gen_mode == PDS_GENERATE_SIZES)) {
4506       const struct pvr_psc_program_output *psc_program = NULL;
4507       switch ((program->arg_buffer >> 2) % 4) {
4508       case 0:
4509          if (program->support_base_instance) {
4510             if (program->increment_draw_id) {
4511                psc_program =
4512                   &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4513             } else {
4514                psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4515             }
4516          } else {
4517             psc_program = &pvr_draw_indirect_arrays0_program;
4518          }
4519          break;
4520       case 1:
4521          if (program->support_base_instance) {
4522             if (program->increment_draw_id) {
4523                psc_program =
4524                   &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4525             } else {
4526                psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4527             }
4528          } else {
4529             psc_program = &pvr_draw_indirect_arrays1_program;
4530          }
4531          break;
4532       case 2:
4533          if (program->support_base_instance) {
4534             if (program->increment_draw_id) {
4535                psc_program =
4536                   &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4537             } else {
4538                psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4539             }
4540          } else {
4541             psc_program = &pvr_draw_indirect_arrays2_program;
4542          }
4543          break;
4544       case 3:
4545          if (program->support_base_instance) {
4546             if (program->increment_draw_id) {
4547                psc_program =
4548                   &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4549             } else {
4550                psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4551             }
4552          } else {
4553             psc_program = &pvr_draw_indirect_arrays3_program;
4554          }
4555          break;
4556       }
4557 
4558       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4559          memcpy(buffer,
4560                 psc_program->code,
4561                 psc_program->code_size * sizeof(uint32_t));
4562 #if defined(DUMP_PDS)
4563          for (uint32_t i = 0; i < psc_program->code_size; i++)
4564             PVR_PDS_PRINT_INST(buffer[i]);
4565 #endif
4566       }
4567 
4568       program->program = *psc_program;
4569    } else {
4570       switch ((program->arg_buffer >> 2) % 4) {
4571       case 0:
4572          if (program->support_base_instance) {
4573             if (program->increment_draw_id) {
4574                pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4575                   buffer,
4576                   program->arg_buffer & ~0xfull,
4577                   dev_info);
4578                pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4579                   buffer,
4580                   program->index_list_addr_buffer + 4);
4581                pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4582                   buffer,
4583                   program->index_list_addr_buffer);
4584                pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4585                   buffer,
4586                   program->num_views);
4587                pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4588                   buffer);
4589             } else {
4590                pvr_write_draw_indirect_arrays_base_instance0_di_data(
4591                   buffer,
4592                   program->arg_buffer & ~0xfull,
4593                   dev_info);
4594                pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4595                   buffer,
4596                   program->index_list_addr_buffer + 4);
4597                pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4598                   buffer,
4599                   program->index_list_addr_buffer);
4600                pvr_write_draw_indirect_arrays_base_instance0_num_views(
4601                   buffer,
4602                   program->num_views);
4603                pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4604             }
4605          } else {
4606             pvr_write_draw_indirect_arrays0_di_data(buffer,
4607                                                     program->arg_buffer &
4608                                                        ~0xfull,
4609                                                     dev_info);
4610             pvr_write_draw_indirect_arrays0_write_vdm(
4611                buffer,
4612                program->index_list_addr_buffer + 4);
4613             pvr_write_draw_indirect_arrays0_flush_vdm(
4614                buffer,
4615                program->index_list_addr_buffer);
4616             pvr_write_draw_indirect_arrays0_num_views(buffer,
4617                                                       program->num_views);
4618             pvr_write_draw_indirect_arrays0_immediates(buffer);
4619          }
4620          break;
4621       case 1:
4622          if (program->support_base_instance) {
4623             if (program->increment_draw_id) {
4624                pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4625                   buffer,
4626                   program->arg_buffer & ~0xfull,
4627                   dev_info);
4628                pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4629                   buffer,
4630                   program->index_list_addr_buffer + 4);
4631                pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4632                   buffer,
4633                   program->index_list_addr_buffer);
4634                pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4635                   buffer,
4636                   program->num_views);
4637                pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4638                   buffer);
4639             } else {
4640                pvr_write_draw_indirect_arrays_base_instance1_di_data(
4641                   buffer,
4642                   program->arg_buffer & ~0xfull,
4643                   dev_info);
4644                pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4645                   buffer,
4646                   program->index_list_addr_buffer + 4);
4647                pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4648                   buffer,
4649                   program->index_list_addr_buffer);
4650                pvr_write_draw_indirect_arrays_base_instance1_num_views(
4651                   buffer,
4652                   program->num_views);
4653                pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4654             }
4655          } else {
4656             pvr_write_draw_indirect_arrays1_di_data(buffer,
4657                                                     program->arg_buffer &
4658                                                        ~0xfull,
4659                                                     dev_info);
4660             pvr_write_draw_indirect_arrays1_write_vdm(
4661                buffer,
4662                program->index_list_addr_buffer + 4);
4663             pvr_write_draw_indirect_arrays1_flush_vdm(
4664                buffer,
4665                program->index_list_addr_buffer);
4666             pvr_write_draw_indirect_arrays1_num_views(buffer,
4667                                                       program->num_views);
4668             pvr_write_draw_indirect_arrays1_immediates(buffer);
4669          }
4670          break;
4671       case 2:
4672          if (program->support_base_instance) {
4673             if (program->increment_draw_id) {
4674                pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4675                   buffer,
4676                   program->arg_buffer & ~0xfull,
4677                   dev_info);
4678                pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4679                   buffer,
4680                   program->index_list_addr_buffer + 4);
4681                pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4682                   buffer,
4683                   program->index_list_addr_buffer);
4684                pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4685                   buffer,
4686                   program->num_views);
4687                pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4688                   buffer);
4689             } else {
4690                pvr_write_draw_indirect_arrays_base_instance2_di_data(
4691                   buffer,
4692                   program->arg_buffer & ~0xfull,
4693                   dev_info);
4694                pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4695                   buffer,
4696                   program->index_list_addr_buffer + 4);
4697                pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4698                   buffer,
4699                   program->index_list_addr_buffer);
4700                pvr_write_draw_indirect_arrays_base_instance2_num_views(
4701                   buffer,
4702                   program->num_views);
4703                pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4704             }
4705          } else {
4706             pvr_write_draw_indirect_arrays2_di_data(buffer,
4707                                                     program->arg_buffer &
4708                                                        ~0xfull,
4709                                                     dev_info);
4710             pvr_write_draw_indirect_arrays2_write_vdm(
4711                buffer,
4712                program->index_list_addr_buffer + 4);
4713             pvr_write_draw_indirect_arrays2_flush_vdm(
4714                buffer,
4715                program->index_list_addr_buffer);
4716             pvr_write_draw_indirect_arrays2_num_views(buffer,
4717                                                       program->num_views);
4718             pvr_write_draw_indirect_arrays2_immediates(buffer);
4719          }
4720          break;
4721       case 3:
4722          if (program->support_base_instance) {
4723             if (program->increment_draw_id) {
4724                pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4725                   buffer,
4726                   program->arg_buffer & ~0xfull,
4727                   dev_info);
4728                pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4729                   buffer,
4730                   program->index_list_addr_buffer + 4);
4731                pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4732                   buffer,
4733                   program->index_list_addr_buffer);
4734                pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4735                   buffer,
4736                   program->num_views);
4737                pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4738                   buffer);
4739             } else {
4740                pvr_write_draw_indirect_arrays_base_instance3_di_data(
4741                   buffer,
4742                   program->arg_buffer & ~0xfull,
4743                   dev_info);
4744                pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4745                   buffer,
4746                   program->index_list_addr_buffer + 4);
4747                pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4748                   buffer,
4749                   program->index_list_addr_buffer);
4750                pvr_write_draw_indirect_arrays_base_instance3_num_views(
4751                   buffer,
4752                   program->num_views);
4753                pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4754             }
4755          } else {
4756             pvr_write_draw_indirect_arrays3_di_data(buffer,
4757                                                     program->arg_buffer &
4758                                                        ~0xfull,
4759                                                     dev_info);
4760             pvr_write_draw_indirect_arrays3_write_vdm(
4761                buffer,
4762                program->index_list_addr_buffer + 4);
4763             pvr_write_draw_indirect_arrays3_flush_vdm(
4764                buffer,
4765                program->index_list_addr_buffer);
4766             pvr_write_draw_indirect_arrays3_num_views(buffer,
4767                                                       program->num_views);
4768             pvr_write_draw_indirect_arrays3_immediates(buffer);
4769          }
4770          break;
4771       }
4772    }
4773 }
4774 
4775 #include "pvr_draw_indirect_elements0.h"
4776 #include "pvr_draw_indirect_elements1.h"
4777 #include "pvr_draw_indirect_elements2.h"
4778 #include "pvr_draw_indirect_elements3.h"
4779 #include "pvr_draw_indirect_elements_base_instance0.h"
4780 #include "pvr_draw_indirect_elements_base_instance1.h"
4781 #include "pvr_draw_indirect_elements_base_instance2.h"
4782 #include "pvr_draw_indirect_elements_base_instance3.h"
4783 #include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4784 #include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4785 #include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4786 #include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4787 
pvr_pds_generate_draw_elements_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4788 void pvr_pds_generate_draw_elements_indirect(
4789    struct pvr_pds_drawindirect_program *restrict program,
4790    uint32_t *restrict buffer,
4791    enum pvr_pds_generate_mode gen_mode,
4792    const struct pvr_device_info *dev_info)
4793 {
4794    if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4795        (gen_mode == PDS_GENERATE_SIZES)) {
4796       const struct pvr_psc_program_output *psc_program = NULL;
4797       switch ((program->arg_buffer >> 2) % 4) {
4798       case 0:
4799          if (program->support_base_instance) {
4800             if (program->increment_draw_id) {
4801                psc_program =
4802                   &pvr_draw_indirect_elements_base_instance_drawid0_program;
4803             } else {
4804                psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4805             }
4806          } else {
4807             psc_program = &pvr_draw_indirect_elements0_program;
4808          }
4809          break;
4810       case 1:
4811          if (program->support_base_instance) {
4812             if (program->increment_draw_id) {
4813                psc_program =
4814                   &pvr_draw_indirect_elements_base_instance_drawid1_program;
4815             } else {
4816                psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4817             }
4818          } else {
4819             psc_program = &pvr_draw_indirect_elements1_program;
4820          }
4821          break;
4822       case 2:
4823          if (program->support_base_instance) {
4824             if (program->increment_draw_id) {
4825                psc_program =
4826                   &pvr_draw_indirect_elements_base_instance_drawid2_program;
4827             } else {
4828                psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4829             }
4830          } else {
4831             psc_program = &pvr_draw_indirect_elements2_program;
4832          }
4833          break;
4834       case 3:
4835          if (program->support_base_instance) {
4836             if (program->increment_draw_id) {
4837                psc_program =
4838                   &pvr_draw_indirect_elements_base_instance_drawid3_program;
4839             } else {
4840                psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4841             }
4842          } else {
4843             psc_program = &pvr_draw_indirect_elements3_program;
4844          }
4845          break;
4846       }
4847 
4848       if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4849          memcpy(buffer,
4850                 psc_program->code,
4851                 psc_program->code_size * sizeof(uint32_t));
4852 
4853 #if defined(DUMP_PDS)
4854          for (uint32_t i = 0; i < psc_program->code_size; i++)
4855             PVR_PDS_PRINT_INST(buffer[i]);
4856 #endif
4857       }
4858 
4859       program->program = *psc_program;
4860    } else {
4861       switch ((program->arg_buffer >> 2) % 4) {
4862       case 0:
4863          if (program->support_base_instance) {
4864             if (program->increment_draw_id) {
4865                pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4866                   buffer,
4867                   program->arg_buffer & ~0xfull,
4868                   dev_info);
4869                pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4870                   buffer,
4871                   program->index_list_addr_buffer);
4872                pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4873                   buffer,
4874                   program->index_list_addr_buffer);
4875                pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4876                   buffer,
4877                   program->num_views);
4878                pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4879                   buffer,
4880                   program->index_stride);
4881                pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4882                   buffer,
4883                   program->index_buffer);
4884                pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4885                   buffer,
4886                   program->index_block_header);
4887                pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4888                   buffer);
4889             } else {
4890                pvr_write_draw_indirect_elements_base_instance0_di_data(
4891                   buffer,
4892                   program->arg_buffer & ~0xfull,
4893                   dev_info);
4894                pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4895                   buffer,
4896                   program->index_list_addr_buffer);
4897                pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4898                   buffer,
4899                   program->index_list_addr_buffer);
4900                pvr_write_draw_indirect_elements_base_instance0_num_views(
4901                   buffer,
4902                   program->num_views);
4903                pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4904                   buffer,
4905                   program->index_stride);
4906                pvr_write_draw_indirect_elements_base_instance0_idx_base(
4907                   buffer,
4908                   program->index_buffer);
4909                pvr_write_draw_indirect_elements_base_instance0_idx_header(
4910                   buffer,
4911                   program->index_block_header);
4912                pvr_write_draw_indirect_elements_base_instance0_immediates(
4913                   buffer);
4914             }
4915          } else {
4916             pvr_write_draw_indirect_elements0_di_data(buffer,
4917                                                       program->arg_buffer &
4918                                                          ~0xfull,
4919                                                       dev_info);
4920             pvr_write_draw_indirect_elements0_write_vdm(
4921                buffer,
4922                program->index_list_addr_buffer);
4923             pvr_write_draw_indirect_elements0_flush_vdm(
4924                buffer,
4925                program->index_list_addr_buffer);
4926             pvr_write_draw_indirect_elements0_num_views(buffer,
4927                                                         program->num_views);
4928             pvr_write_draw_indirect_elements0_idx_stride(buffer,
4929                                                          program->index_stride);
4930             pvr_write_draw_indirect_elements0_idx_base(buffer,
4931                                                        program->index_buffer);
4932             pvr_write_draw_indirect_elements0_idx_header(
4933                buffer,
4934                program->index_block_header);
4935             pvr_write_draw_indirect_elements0_immediates(buffer);
4936          }
4937          break;
4938       case 1:
4939          if (program->support_base_instance) {
4940             if (program->increment_draw_id) {
4941                pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4942                   buffer,
4943                   program->arg_buffer & ~0xfull,
4944                   dev_info);
4945                pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4946                   buffer,
4947                   program->index_list_addr_buffer);
4948                pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4949                   buffer,
4950                   program->index_list_addr_buffer);
4951                pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4952                   buffer,
4953                   program->num_views);
4954                pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4955                   buffer,
4956                   program->index_stride);
4957                pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4958                   buffer,
4959                   program->index_buffer);
4960                pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4961                   buffer,
4962                   program->index_block_header);
4963                pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4964                   buffer);
4965             } else {
4966                pvr_write_draw_indirect_elements_base_instance1_di_data(
4967                   buffer,
4968                   program->arg_buffer & ~0xfull,
4969                   dev_info);
4970                pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4971                   buffer,
4972                   program->index_list_addr_buffer);
4973                pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4974                   buffer,
4975                   program->index_list_addr_buffer);
4976                pvr_write_draw_indirect_elements_base_instance1_num_views(
4977                   buffer,
4978                   program->num_views);
4979                pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4980                   buffer,
4981                   program->index_stride);
4982                pvr_write_draw_indirect_elements_base_instance1_idx_base(
4983                   buffer,
4984                   program->index_buffer);
4985                pvr_write_draw_indirect_elements_base_instance1_idx_header(
4986                   buffer,
4987                   program->index_block_header);
4988                pvr_write_draw_indirect_elements_base_instance1_immediates(
4989                   buffer);
4990             }
4991          } else {
4992             pvr_write_draw_indirect_elements1_di_data(buffer,
4993                                                       program->arg_buffer &
4994                                                          ~0xfull,
4995                                                       dev_info);
4996             pvr_write_draw_indirect_elements1_write_vdm(
4997                buffer,
4998                program->index_list_addr_buffer);
4999             pvr_write_draw_indirect_elements1_flush_vdm(
5000                buffer,
5001                program->index_list_addr_buffer);
5002             pvr_write_draw_indirect_elements1_num_views(buffer,
5003                                                         program->num_views);
5004             pvr_write_draw_indirect_elements1_idx_stride(buffer,
5005                                                          program->index_stride);
5006             pvr_write_draw_indirect_elements1_idx_base(buffer,
5007                                                        program->index_buffer);
5008             pvr_write_draw_indirect_elements1_idx_header(
5009                buffer,
5010                program->index_block_header);
5011             pvr_write_draw_indirect_elements1_immediates(buffer);
5012          }
5013          break;
5014       case 2:
5015          if (program->support_base_instance) {
5016             if (program->increment_draw_id) {
5017                pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
5018                   buffer,
5019                   program->arg_buffer & ~0xfull,
5020                   dev_info);
5021                pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
5022                   buffer,
5023                   program->index_list_addr_buffer);
5024                pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5025                   buffer,
5026                   program->index_list_addr_buffer);
5027                pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5028                   buffer,
5029                   program->num_views);
5030                pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5031                   buffer,
5032                   program->index_stride);
5033                pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5034                   buffer,
5035                   program->index_buffer);
5036                pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5037                   buffer,
5038                   program->index_block_header);
5039                pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5040                   buffer);
5041             } else {
5042                pvr_write_draw_indirect_elements_base_instance2_di_data(
5043                   buffer,
5044                   program->arg_buffer & ~0xfull,
5045                   dev_info);
5046                pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5047                   buffer,
5048                   program->index_list_addr_buffer);
5049                pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5050                   buffer,
5051                   program->index_list_addr_buffer);
5052                pvr_write_draw_indirect_elements_base_instance2_num_views(
5053                   buffer,
5054                   program->num_views);
5055                pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5056                   buffer,
5057                   program->index_stride);
5058                pvr_write_draw_indirect_elements_base_instance2_idx_base(
5059                   buffer,
5060                   program->index_buffer);
5061                pvr_write_draw_indirect_elements_base_instance2_idx_header(
5062                   buffer,
5063                   program->index_block_header);
5064                pvr_write_draw_indirect_elements_base_instance2_immediates(
5065                   buffer);
5066             }
5067          } else {
5068             pvr_write_draw_indirect_elements2_di_data(buffer,
5069                                                       program->arg_buffer &
5070                                                          ~0xfull,
5071                                                       dev_info);
5072             pvr_write_draw_indirect_elements2_write_vdm(
5073                buffer,
5074                program->index_list_addr_buffer);
5075             pvr_write_draw_indirect_elements2_flush_vdm(
5076                buffer,
5077                program->index_list_addr_buffer);
5078             pvr_write_draw_indirect_elements2_num_views(buffer,
5079                                                         program->num_views);
5080             pvr_write_draw_indirect_elements2_idx_stride(buffer,
5081                                                          program->index_stride);
5082             pvr_write_draw_indirect_elements2_idx_base(buffer,
5083                                                        program->index_buffer);
5084             pvr_write_draw_indirect_elements2_idx_header(
5085                buffer,
5086                program->index_block_header);
5087             pvr_write_draw_indirect_elements2_immediates(buffer);
5088          }
5089          break;
5090       case 3:
5091          if (program->support_base_instance) {
5092             if (program->increment_draw_id) {
5093                pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5094                   buffer,
5095                   program->arg_buffer & ~0xfull,
5096                   dev_info);
5097                pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5098                   buffer,
5099                   program->index_list_addr_buffer);
5100                pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5101                   buffer,
5102                   program->index_list_addr_buffer);
5103                pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5104                   buffer,
5105                   program->num_views);
5106                pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5107                   buffer,
5108                   program->index_stride);
5109                pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5110                   buffer,
5111                   program->index_buffer);
5112                pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5113                   buffer,
5114                   program->index_block_header);
5115                pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5116                   buffer);
5117             } else {
5118                pvr_write_draw_indirect_elements_base_instance3_di_data(
5119                   buffer,
5120                   program->arg_buffer & ~0xfull,
5121                   dev_info);
5122                pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5123                   buffer,
5124                   program->index_list_addr_buffer);
5125                pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5126                   buffer,
5127                   program->index_list_addr_buffer);
5128                pvr_write_draw_indirect_elements_base_instance3_num_views(
5129                   buffer,
5130                   program->num_views);
5131                pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5132                   buffer,
5133                   program->index_stride);
5134                pvr_write_draw_indirect_elements_base_instance3_idx_base(
5135                   buffer,
5136                   program->index_buffer);
5137                pvr_write_draw_indirect_elements_base_instance3_idx_header(
5138                   buffer,
5139                   program->index_block_header);
5140                pvr_write_draw_indirect_elements_base_instance3_immediates(
5141                   buffer);
5142             }
5143          } else {
5144             pvr_write_draw_indirect_elements3_di_data(buffer,
5145                                                       program->arg_buffer &
5146                                                          ~0xfull,
5147                                                       dev_info);
5148             pvr_write_draw_indirect_elements3_write_vdm(
5149                buffer,
5150                program->index_list_addr_buffer);
5151             pvr_write_draw_indirect_elements3_flush_vdm(
5152                buffer,
5153                program->index_list_addr_buffer);
5154             pvr_write_draw_indirect_elements3_num_views(buffer,
5155                                                         program->num_views);
5156             pvr_write_draw_indirect_elements3_idx_stride(buffer,
5157                                                          program->index_stride);
5158             pvr_write_draw_indirect_elements3_idx_base(buffer,
5159                                                        program->index_buffer);
5160             pvr_write_draw_indirect_elements3_idx_header(
5161                buffer,
5162                program->index_block_header);
5163             pvr_write_draw_indirect_elements3_immediates(buffer);
5164          }
5165          break;
5166       }
5167    }
5168 }
5169