1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37
38 #define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39 #define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40
41 /*****************************************************************************
42 Macro definitions
43 *****************************************************************************/
44
45 #define PVR_PDS_DWORD_SHIFT 2
46
47 #define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48 #define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49 #define PVR_PDS_TEMPS_BLOCK_BASE 128
50 #define PVR_PDS_TEMPS_BLOCK_SIZE 32
51
52 #define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53 #define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54
55 /* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56 * available in the coefficient sync task.
57 */
58 #define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59 #define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60 #define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61 /* Local IDs are available in every task. */
62 #define PVR_PDS_CDM_LOCAL_ID_X 0
63 #define PVR_PDS_CDM_LOCAL_ID_YZ 1
64
65 #define PVR_PDS_DOUTW_LOWER32 0x0
66 #define PVR_PDS_DOUTW_UPPER32 0x1
67 #define PVR_PDS_DOUTW_LOWER64 0x2
68 #define PVR_PDS_DOUTW_LOWER128 0x3
69 #define PVR_PDS_DOUTW_MAXMASK 0x4
70
71 #define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72 #define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73
74 /*****************************************************************************
75 Static variables
76 *****************************************************************************/
77
78 static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83 };
84
85 /* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86 * cache_control_const[1].
87 */
88 static const uint32_t cache_control_const[2][2] = {
89 { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91 { 0, 0 }
92 };
93
94 /*****************************************************************************
95 Function definitions
96 *****************************************************************************/
97
pvr_pds_encode_ld_src0(uint64_t dest,uint64_t count8,uint64_t src_add,bool cached,const struct pvr_device_info * dev_info)98 uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99 uint64_t count8,
100 uint64_t src_add,
101 bool cached,
102 const struct pvr_device_info *dev_info)
103 {
104 uint64_t encoded = 0;
105
106 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109 }
110
111 encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113 encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116 : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117 encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119
120 return encoded;
121 }
122
pvr_pds_encode_st_src0(uint64_t src,uint64_t count4,uint64_t dst_add,bool write_through,const struct pvr_device_info * device_info)123 uint64_t pvr_pds_encode_st_src0(uint64_t src,
124 uint64_t count4,
125 uint64_t dst_add,
126 bool write_through,
127 const struct pvr_device_info *device_info)
128 {
129 uint64_t encoded = 0;
130
131 if (device_info->features.has_slc_mcu_cache_controls) {
132 encoded |= (write_through
133 ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134 : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135 }
136
137 encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139 encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141 encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142 : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143 encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145
146 return encoded;
147 }
148
149 static ALWAYS_INLINE uint32_t
pvr_pds_encode_doutw_src1(uint32_t dest,uint32_t dword_mask,uint32_t flags,bool cached,const struct pvr_device_info * dev_info)150 pvr_pds_encode_doutw_src1(uint32_t dest,
151 uint32_t dword_mask,
152 uint32_t flags,
153 bool cached,
154 const struct pvr_device_info *dev_info)
155 {
156 assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157 ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158 (dword_mask < PVR_PDS_DOUTW_LOWER64));
159
160 uint32_t encoded =
161 (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162
163 encoded |= dword_mask_const[dword_mask];
164
165 encoded |= flags;
166
167 encoded |=
168 cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169 : 1]
170 [cached ? 1 : 0];
171 return encoded;
172 }
173
pvr_pds_encode_doutw64(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)174 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175 uint32_t end,
176 uint32_t src1,
177 uint32_t src0)
178 {
179 return pvr_pds_inst_encode_dout(cc,
180 end,
181 src1,
182 src0,
183 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184 }
185
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)186 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187 uint32_t end,
188 uint32_t src0)
189 {
190 return pvr_pds_inst_encode_dout(cc,
191 end,
192 0,
193 src0,
194 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195 }
196
pvr_pds_inst_encode_doutc(uint32_t cc,uint32_t end)197 static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198 uint32_t end)
199 {
200 return pvr_pds_inst_encode_dout(cc,
201 end,
202 0,
203 0,
204 PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205 }
206
pvr_pds_encode_doutd(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)207 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208 uint32_t end,
209 uint32_t src1,
210 uint32_t src0)
211 {
212 return pvr_pds_inst_encode_dout(cc,
213 end,
214 src1,
215 src0,
216 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218
pvr_pds_encode_douti(uint32_t cc,uint32_t end,uint32_t src0)219 static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220 uint32_t end,
221 uint32_t src0)
222 {
223 return pvr_pds_inst_encode_dout(cc,
224 end,
225 0,
226 src0,
227 PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228 }
229
pvr_pds_encode_bra(uint32_t srcc,uint32_t neg,uint32_t setc,int32_t relative_address)230 static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231 uint32_t neg,
232 uint32_t setc,
233 int32_t relative_address)
234 {
235 /* Address should be signed but API only allows unsigned value. */
236 return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237 }
238
239 /**
240 * Gets the next constant address and moves the next constant pointer along.
241 *
242 * \param next_constant Pointer to the next constant address.
243 * \param num_constants The number of constants required.
244 * \param count The number of constants allocated.
245 * \return The address of the next constant.
246 */
pvr_pds_get_constants(uint32_t * next_constant,uint32_t num_constants,uint32_t * count)247 static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248 uint32_t num_constants,
249 uint32_t *count)
250 {
251 uint32_t constant;
252
253 /* Work out starting constant number. For even number of constants, start on
254 * a 64-bit boundary.
255 */
256 if (num_constants & 1)
257 constant = *next_constant;
258 else
259 constant = (*next_constant + 1) & ~1;
260
261 /* Update the count with the number of constants actually allocated. */
262 *count += constant + num_constants - *next_constant;
263
264 /* Move the next constant pointer. */
265 *next_constant = constant + num_constants;
266
267 assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268
269 return constant;
270 }
271
272 /**
273 * Gets the next temp address and moves the next temp pointer along.
274 *
275 * \param next_temp Pointer to the next temp address.
276 * \param num_temps The number of temps required.
277 * \param count The number of temps allocated.
278 * \return The address of the next temp.
279 */
280 static uint32_t
pvr_pds_get_temps(uint32_t * next_temp,uint32_t num_temps,uint32_t * count)281 pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282 {
283 uint32_t temp;
284
285 /* Work out starting temp number. For even number of temps, start on a
286 * 64-bit boundary.
287 */
288 if (num_temps & 1)
289 temp = *next_temp;
290 else
291 temp = (*next_temp + 1) & ~1;
292
293 /* Update the count with the number of temps actually allocated. */
294 *count += temp + num_temps - *next_temp;
295
296 /* Move the next temp pointer. */
297 *next_temp = temp + num_temps;
298
299 assert((temp + num_temps) <=
300 (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301
302 return temp;
303 }
304
305 /**
306 * Write a 32-bit constant indexed by the long range.
307 *
308 * \param data_block Pointer to data block to write to.
309 * \param index Index within the data to write to.
310 * \param dword The 32-bit constant to write.
311 */
312 static void
pvr_pds_write_constant32(uint32_t * data_block,uint32_t index,uint32_t dword0)313 pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314 {
315 /* Check range. */
316 assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317 PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318
319 data_block[index + 0] = dword0;
320
321 PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322 }
323
324 /**
325 * Write a 64-bit constant indexed by the long range.
326 *
327 * \param data_block Pointer to data block to write to.
328 * \param index Index within the data to write to.
329 * \param dword0 Lower half of the 64 bit constant.
330 * \param dword1 Upper half of the 64 bit constant.
331 */
pvr_pds_write_constant64(uint32_t * data_block,uint32_t index,uint32_t dword0,uint32_t dword1)332 static void pvr_pds_write_constant64(uint32_t *data_block,
333 uint32_t index,
334 uint32_t dword0,
335 uint32_t dword1)
336 {
337 /* Has to be on 64 bit boundary. */
338 assert((index & 1) == 0);
339
340 /* Check range. */
341 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343
344 data_block[index + 0] = dword0;
345 data_block[index + 1] = dword1;
346
347 PVR_PDS_PRINT_DATA("WriteConstant64",
348 ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349 index);
350 }
351
352 /**
353 * Write a 64-bit constant from a single wide word indexed by the long-range
354 * number.
355 *
356 * \param data_block Pointer to data block to write to.
357 * \param index Index within the data to write to.
358 * \param word The 64-bit constant to write.
359 */
360
361 static void
pvr_pds_write_wide_constant(uint32_t * data_block,uint32_t index,uint64_t word)362 pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363 {
364 /* Has to be on 64 bit boundary. */
365 assert((index & 1) == 0);
366
367 /* Check range. */
368 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370
371 data_block[index + 0] = L32(word);
372 data_block[index + 1] = H32(word);
373
374 PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375 }
376
pvr_pds_write_dma_address(uint32_t * data_block,uint32_t index,uint64_t address,bool coherent,const struct pvr_device_info * dev_info)377 static void pvr_pds_write_dma_address(uint32_t *data_block,
378 uint32_t index,
379 uint64_t address,
380 bool coherent,
381 const struct pvr_device_info *dev_info)
382 {
383 /* Has to be on 64 bit boundary. */
384 assert((index & 1) == 0);
385
386 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387 address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388
389 /* Check range. */
390 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392
393 data_block[index + 0] = L32(address);
394 data_block[index + 1] = H32(address);
395
396 PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397 }
398
399 /**
400 * External API to append a 64-bit constant to an existing data segment
401 * allocation.
402 *
403 * \param constants Pointer to start of data segment.
404 * \param constant_value Value to write to constant.
405 * \param data_size The number of constants allocated.
406 * \returns The address of the next constant.
407 */
pvr_pds_append_constant64(uint32_t * constants,uint64_t constant_value,uint32_t * data_size)408 uint32_t pvr_pds_append_constant64(uint32_t *constants,
409 uint64_t constant_value,
410 uint32_t *data_size)
411 {
412 /* Calculate next constant from current data size. */
413 uint32_t next_constant = *data_size;
414 uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415
416 /* Set the value. */
417 pvr_pds_write_wide_constant(constants, constant, constant_value);
418
419 return constant;
420 }
421
pvr_pds_pixel_shader_sa_initialize(struct pvr_pds_pixel_shader_sa_program * program)422 void pvr_pds_pixel_shader_sa_initialize(
423 struct pvr_pds_pixel_shader_sa_program *program)
424 {
425 memset(program, 0, sizeof(*program));
426 }
427
428 /**
429 * Encode a DMA burst.
430 *
431 * \param dma_control DMA control words.
432 * \param dma_address DMA address.
433 * \param dest_offset Destination offset in the attribute.
434 * \param dma_size The size of the DMA in words.
435 * \param src_address Source address for the burst.
436 * \param last Last DMA in program.
437 * \param dev_info PVR device info structure.
438 * \returns The number of DMA transfers required.
439 */
pvr_pds_encode_dma_burst(uint32_t * dma_control,uint64_t * dma_address,uint32_t dest_offset,uint32_t dma_size,uint64_t src_address,bool last,const struct pvr_device_info * dev_info)440 uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441 uint64_t *dma_address,
442 uint32_t dest_offset,
443 uint32_t dma_size,
444 uint64_t src_address,
445 bool last,
446 const struct pvr_device_info *dev_info)
447 {
448 dma_control[0] = dma_size
449 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
450 dma_control[0] |= dest_offset
451 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
452
453 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
454 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
455
456 if (last)
457 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
458
459 dma_address[0] = src_address;
460 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
461 dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
462
463 /* Force to 1 DMA. */
464 return 1;
465 }
466
467 /* FIXME: use the csbgen interface and pvr_csb_pack.
468 * FIXME: use bool for phase_rate_change.
469 */
470 /**
471 * Sets up the USC control words for a DOUTU.
472 *
473 * \param usc_task_control USC task control structure to be setup.
474 * \param execution_address USC execution virtual address.
475 * \param usc_temps Number of USC temps.
476 * \param sample_rate Sample rate for the DOUTU.
477 * \param phase_rate_change Phase rate change for the DOUTU.
478 */
pvr_pds_setup_doutu(struct pvr_pds_usc_task_control * usc_task_control,uint64_t execution_address,uint32_t usc_temps,uint32_t sample_rate,bool phase_rate_change)479 void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
480 uint64_t execution_address,
481 uint32_t usc_temps,
482 uint32_t sample_rate,
483 bool phase_rate_change)
484 {
485 usc_task_control->src0 = UINT64_C(0);
486
487 /* Set the execution address. */
488 pvr_set_usc_execution_address64(&(usc_task_control->src0),
489 execution_address);
490
491 if (usc_temps > 0) {
492 /* Temps are allocated in blocks of 4 dwords. */
493 usc_temps =
494 DIV_ROUND_UP(usc_temps,
495 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
496
497 /* Check for losing temps due to too many requested. */
498 assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
499 usc_temps);
500
501 usc_task_control->src0 |=
502 ((uint64_t)(usc_temps &
503 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
504 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
505 }
506
507 if (sample_rate > 0) {
508 usc_task_control->src0 |=
509 ((uint64_t)sample_rate)
510 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
511 }
512
513 if (phase_rate_change) {
514 usc_task_control->src0 |=
515 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
516 }
517 }
518
519 /**
520 * Generates the PDS pixel event program.
521 *
522 * \param program Pointer to the PDS pixel event program.
523 * \param buffer Pointer to the buffer for the program.
524 * \param gen_mode Generate either a data segment or code segment.
525 * \param dev_info PVR device info structure.
526 * \returns Pointer to just beyond the buffer for the program.
527 */
528 uint32_t *
pvr_pds_generate_pixel_event(struct pvr_pds_event_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)529 pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
530 uint32_t *restrict buffer,
531 enum pvr_pds_generate_mode gen_mode,
532 const struct pvr_device_info *dev_info)
533 {
534 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
535 uint32_t *constants = buffer;
536
537 uint32_t data_size = 0;
538
539 /* Copy the DMA control words and USC task control words to constants, then
540 * arrange them so that the 64-bit words are together followed by the 32-bit
541 * words.
542 */
543 uint32_t control_constant =
544 pvr_pds_get_constants(&next_constant, 2, &data_size);
545 uint32_t emit_constant =
546 pvr_pds_get_constants(&next_constant,
547 (2 * program->num_emit_word_pairs),
548 &data_size);
549
550 uint32_t control_word_constant =
551 pvr_pds_get_constants(&next_constant,
552 program->num_emit_word_pairs,
553 &data_size);
554
555 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
556 /* Src0 for DOUTU. */
557 pvr_pds_write_wide_constant(buffer,
558 control_constant,
559 program->task_control.src0); /* DOUTU */
560 /* 64-bit Src0. */
561
562 /* Emit words for end of tile program. */
563 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
564 pvr_pds_write_constant64(constants,
565 emit_constant + (2 * i),
566 program->emit_words[(2 * i) + 0],
567 program->emit_words[(2 * i) + 1]);
568 }
569
570 /* Control words. */
571 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
572 uint32_t doutw = pvr_pds_encode_doutw_src1(
573 (2 * i),
574 PVR_PDS_DOUTW_LOWER64,
575 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
576 false,
577 dev_info);
578
579 if (i == (program->num_emit_word_pairs - 1))
580 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
581
582 pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
583 }
584 }
585
586 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
587 /* DOUTW the state into the shared register. */
588 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
589 *buffer++ = pvr_pds_encode_doutw64(
590 /* cc */ 0,
591 /* END */ 0,
592 /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
593 /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
594 */
595 }
596
597 /* Kick the USC. */
598 *buffer++ = pvr_pds_encode_doutu(
599 /* cc */ 0,
600 /* END */ 1,
601 /* SRC0 */ control_constant >> 1);
602 }
603
604 uint32_t code_size = 1 + program->num_emit_word_pairs;
605
606 /* Save the data segment Pointer and size. */
607 program->data_segment = constants;
608 program->data_size = data_size;
609 program->code_size = code_size;
610
611 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
612 return (constants + next_constant);
613
614 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
615 return buffer;
616
617 return NULL;
618 }
619
620 /**
621 * Checks if any of the vertex streams contains instance data.
622 *
623 * \param streams Streams contained in the vertex shader.
624 * \param num_streams Number of vertex streams.
625 * \returns true if one or more of the given vertex streams contains
626 * instance data, otherwise false.
627 */
pvr_pds_vertex_streams_contains_instance_data(const struct pvr_pds_vertex_stream * streams,uint32_t num_streams)628 static bool pvr_pds_vertex_streams_contains_instance_data(
629 const struct pvr_pds_vertex_stream *streams,
630 uint32_t num_streams)
631 {
632 for (uint32_t i = 0; i < num_streams; i++) {
633 const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
634 if (vertex_stream->instance_data)
635 return true;
636 }
637
638 return false;
639 }
640
pvr_pds_get_bank_based_constants(uint32_t num_backs,uint32_t * next_constant,uint32_t num_constants,uint32_t * count)641 static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
642 uint32_t *next_constant,
643 uint32_t num_constants,
644 uint32_t *count)
645 {
646 /* Allocate constant for PDS vertex shader where constant is divided into
647 * banks.
648 */
649 uint32_t constant;
650
651 assert(num_constants == 1 || num_constants == 2);
652
653 if (*next_constant >= (num_backs << 3))
654 return pvr_pds_get_constants(next_constant, num_constants, count);
655
656 if ((*next_constant % 8) == 0) {
657 constant = *next_constant;
658
659 if (num_constants == 1)
660 *next_constant += 1;
661 else
662 *next_constant += 8;
663 } else if (num_constants == 1) {
664 constant = *next_constant;
665 *next_constant += 7;
666 } else {
667 *next_constant += 7;
668 constant = *next_constant;
669
670 if (*next_constant >= (num_backs << 3)) {
671 *next_constant += 2;
672 *count += 2;
673 } else {
674 *next_constant += 8;
675 }
676 }
677 return constant;
678 }
679
680 /**
681 * Generates a PDS program to load USC vertex inputs based from one or more
682 * vertex buffers, each containing potentially multiple elements, and then a
683 * DOUTU to execute the USC.
684 *
685 * \param program Pointer to the description of the program which should be
686 * generated.
687 * \param buffer Pointer to buffer that receives the output of this function.
688 * Will either be the data segment or code segment depending on
689 * gen_mode.
690 * \param gen_mode Which part to generate, either data segment or
691 * code segment. If PDS_GENERATE_SIZES is specified, nothing is
692 * written, but size information in program is updated.
693 * \param dev_info PVR device info structure.
694 * \returns Pointer to just beyond the buffer for the data - i.e the value
695 * of the buffer after writing its contents.
696 */
697 /* FIXME: Implement PDS_GENERATE_CODEDATA_SEGMENTS? */
698 uint32_t *
pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)699 pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700 uint32_t *restrict buffer,
701 enum pvr_pds_generate_mode gen_mode,
702 const struct pvr_device_info *dev_info)
703 {
704 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705 uint32_t next_stream_constant;
706 uint32_t next_temp;
707 uint32_t usc_control_constant64;
708 uint32_t stride_constant32 = 0;
709 uint32_t dma_address_constant64 = 0;
710 uint32_t dma_control_constant64;
711 uint32_t multiplier_constant32 = 0;
712 uint32_t base_instance_const32 = 0;
713
714 uint32_t temp = 0;
715 uint32_t index_temp64 = 0;
716 uint32_t num_vertices_temp64 = 0;
717 uint32_t pre_index_temp = (uint32_t)(-1);
718 bool first_ddmadt = true;
719 uint32_t input_register0;
720 uint32_t input_register1;
721 uint32_t input_register2;
722
723 struct pvr_pds_vertex_stream *vertex_stream;
724 struct pvr_pds_vertex_element *vertex_element;
725 uint32_t shift_2s_comp;
726
727 uint32_t data_size = 0;
728 uint32_t code_size = 0;
729 uint32_t temps_used = 0;
730
731 bool direct_writes_needed = false;
732
733 uint32_t consts_size = 0;
734 uint32_t vertex_id_control_word_const32 = 0;
735 uint32_t instance_id_control_word_const32 = 0;
736 uint32_t instance_id_modifier_word_const32 = 0;
737 uint32_t geometry_id_control_word_const64 = 0;
738 uint32_t empty_dma_control_constant64 = 0;
739
740 bool any_instanced_stream =
741 pvr_pds_vertex_streams_contains_instance_data(program->streams,
742 program->num_streams);
743
744 uint32_t base_instance_register = 0;
745 uint32_t ddmadt_enables = 0;
746
747 bool issue_empty_ddmad = false;
748 uint32_t last_stream_index = program->num_streams - 1;
749 bool current_p0 = false;
750 uint32_t skip_stream_flag = 0;
751
752 /* Generate the PDS vertex shader data. */
753
754 #if MESA_DEBUG
755 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756 for (uint32_t i = 0; i < program->data_size; i++)
757 buffer[i] = 0xDEADBEEF;
758 }
759 #endif
760
761 /* Generate the PDS vertex shader program */
762 next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763 /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764 input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765 /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766 input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767
768 if (program->iterate_remap_id)
769 input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770 else
771 input_register2 = 0; /* Not used, but need to silence the compiler. */
772
773 /* Generate the PDS vertex shader code. The constants in the data block are
774 * arranged as follows:
775 *
776 * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank
777 * 3 Not used (tmps) Stride | Multiplier Address Control
778 */
779
780 /* Find out how many constants are needed by streams. */
781 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782 pvr_pds_get_constants(&next_constant,
783 8 * program->streams[stream].num_elements,
784 &consts_size);
785 }
786
787 /* If there are no vertex streams allocate the first bank for USC Code
788 * Address.
789 */
790 if (consts_size == 0)
791 pvr_pds_get_constants(&next_constant, 2, &consts_size);
792 else
793 next_constant = 8;
794
795 direct_writes_needed = program->iterate_instance_id ||
796 program->iterate_vtx_id || program->iterate_remap_id;
797
798 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799 /* Evaluate what config of DDMAD should be used for each stream. */
800 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801 vertex_stream = &program->streams[stream];
802
803 if (vertex_stream->use_ddmadt) {
804 ddmadt_enables |= (1 << stream);
805
806 /* The condition for index value is:
807 * index * stride + size <= bufferSize (all in unit of byte)
808 */
809 if (vertex_stream->stride == 0) {
810 if (vertex_stream->elements[0].size <=
811 vertex_stream->buffer_size_in_bytes) {
812 /* index can be any value -> no need to use DDMADT. */
813 ddmadt_enables &= (~(1 << stream));
814 } else {
815 /* No index works -> no need to issue DDMAD instruction.
816 */
817 skip_stream_flag |= (1 << stream);
818 }
819 } else {
820 /* index * stride + size <= bufferSize
821 *
822 * can be converted to:
823 * index <= (bufferSize - size) / stride
824 *
825 * where maximum index is:
826 * integer((bufferSize - size) / stride).
827 */
828 if (vertex_stream->buffer_size_in_bytes <
829 vertex_stream->elements[0].size) {
830 /* No index works -> no need to issue DDMAD instruction.
831 */
832 skip_stream_flag |= (1 << stream);
833 } else {
834 uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835 vertex_stream->elements[0].size) /
836 vertex_stream->stride;
837 if (max_index == 0xFFFFFFFFu) {
838 /* No need to use DDMADT as all possible indices can
839 * pass the test.
840 */
841 ddmadt_enables &= (~(1 << stream));
842 } else {
843 /* In this case, test condition can be changed to
844 * index < max_index + 1.
845 */
846 program->streams[stream].num_vertices =
847 pvr_pds_get_bank_based_constants(program->num_streams,
848 &next_constant,
849 1,
850 &consts_size);
851
852 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853 pvr_pds_write_constant32(
854 buffer,
855 program->streams[stream].num_vertices,
856 max_index + 1);
857 }
858 }
859 }
860 }
861 }
862
863 if ((skip_stream_flag & (1 << stream)) == 0) {
864 issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865 last_stream_index = stream;
866 }
867 }
868 } else {
869 if (program->num_streams > 0 &&
870 program->streams[program->num_streams - 1].use_ddmadt) {
871 issue_empty_ddmad = true;
872 }
873 }
874
875 if (direct_writes_needed)
876 issue_empty_ddmad = false;
877
878 if (issue_empty_ddmad) {
879 /* An empty DMA control const (DMA size = 0) is required in case the
880 * last DDMADD is predicated out and last flag does not have any usage.
881 */
882 empty_dma_control_constant64 =
883 pvr_pds_get_bank_based_constants(program->num_streams,
884 &next_constant,
885 2,
886 &consts_size);
887 }
888
889 /* Assign constants for non stream or base instance if there is any
890 * instanced stream.
891 */
892 if (direct_writes_needed || any_instanced_stream ||
893 program->instance_id_modifier) {
894 if (program->iterate_vtx_id) {
895 vertex_id_control_word_const32 =
896 pvr_pds_get_bank_based_constants(program->num_streams,
897 &next_constant,
898 1,
899 &consts_size);
900 }
901
902 if (program->iterate_instance_id || program->instance_id_modifier) {
903 if (program->instance_id_modifier == 0) {
904 instance_id_control_word_const32 =
905 pvr_pds_get_bank_based_constants(program->num_streams,
906 &next_constant,
907 1,
908 &consts_size);
909 } else {
910 instance_id_modifier_word_const32 =
911 pvr_pds_get_bank_based_constants(program->num_streams,
912 &next_constant,
913 1,
914 &consts_size);
915 if ((instance_id_modifier_word_const32 % 2) == 0) {
916 instance_id_control_word_const32 =
917 pvr_pds_get_bank_based_constants(program->num_streams,
918 &next_constant,
919 1,
920 &consts_size);
921 } else {
922 instance_id_control_word_const32 =
923 instance_id_modifier_word_const32;
924 instance_id_modifier_word_const32 =
925 pvr_pds_get_bank_based_constants(program->num_streams,
926 &next_constant,
927 1,
928 &consts_size);
929 }
930 }
931 }
932
933 if (program->base_instance != 0) {
934 base_instance_const32 =
935 pvr_pds_get_bank_based_constants(program->num_streams,
936 &next_constant,
937 1,
938 &consts_size);
939 }
940
941 if (program->iterate_remap_id) {
942 geometry_id_control_word_const64 =
943 pvr_pds_get_bank_based_constants(program->num_streams,
944 &next_constant,
945 2,
946 &consts_size);
947 }
948 }
949
950 if (program->instance_id_modifier != 0) {
951 /* This instanceID modifier is used when a draw array instanced call
952 * sourcing from client data cannot fit into vertex buffer and needs to
953 * be broken down into several draw calls.
954 */
955
956 code_size += 1;
957
958 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959 pvr_pds_write_constant32(buffer,
960 instance_id_modifier_word_const32,
961 program->instance_id_modifier);
962 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963 *buffer++ = pvr_pds_inst_encode_add32(
964 /* cc */ 0x0,
965 /* ALUM */ 0, /* Unsigned */
966 /* SNA */ 0, /* Add */
967 /* SRC0 32b */ instance_id_modifier_word_const32,
968 /* SRC1 32b */ input_register1,
969 /* DST 32b */ input_register1);
970 }
971 }
972
973 /* Adjust instanceID if necessary. */
974 if (any_instanced_stream || program->iterate_instance_id) {
975 if (program->base_instance != 0) {
976 assert(!program->draw_indirect);
977
978 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979 pvr_pds_write_constant32(buffer,
980 base_instance_const32,
981 program->base_instance);
982 }
983
984 base_instance_register = base_instance_const32;
985 }
986
987 if (program->draw_indirect) {
988 assert((program->instance_id_modifier == 0) &&
989 (program->base_instance == 0));
990
991 base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992 }
993 }
994
995 next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996 usc_control_constant64 =
997 pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998
999 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000 bool instance_data_with_base_instance;
1001
1002 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003 ((skip_stream_flag & (1 << stream)) != 0)) {
1004 continue;
1005 }
1006
1007 vertex_stream = &program->streams[stream];
1008
1009 instance_data_with_base_instance =
1010 ((vertex_stream->instance_data) &&
1011 ((program->base_instance > 0) || (program->draw_indirect)));
1012
1013 /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014 * USC constants.
1015 */
1016 if (stream == 0) {
1017 stride_constant32 =
1018 pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019 } else {
1020 next_constant =
1021 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022
1023 /* Skip bank 0. */
1024 stride_constant32 = next_constant + 2;
1025 }
1026
1027 multiplier_constant32 = stride_constant32 + 1;
1028
1029 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030 pvr_pds_write_constant32(buffer,
1031 stride_constant32,
1032 vertex_stream->stride);
1033
1034 /* Vertex stream frequency multiplier. */
1035 if (vertex_stream->multiplier)
1036 pvr_pds_write_constant32(buffer,
1037 multiplier_constant32,
1038 vertex_stream->multiplier);
1039 }
1040
1041 /* Update the code size count and temps count for the above code
1042 * segment.
1043 */
1044 if (vertex_stream->current_state) {
1045 code_size += 1;
1046 temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047 } else {
1048 unsigned int num_temps_required = 0;
1049
1050 if (vertex_stream->multiplier) {
1051 num_temps_required += 2;
1052 code_size += 3;
1053
1054 if (vertex_stream->shift) {
1055 code_size += 1;
1056
1057 if ((int32_t)vertex_stream->shift > 0)
1058 code_size += 1;
1059 }
1060 } else if (vertex_stream->shift) {
1061 code_size += 1;
1062 num_temps_required += 1;
1063 } else if (instance_data_with_base_instance) {
1064 num_temps_required += 1;
1065 }
1066
1067 if (num_temps_required != 0) {
1068 temp = pvr_pds_get_temps(&next_temp,
1069 num_temps_required,
1070 &temps_used); /* 64-bit */
1071 } else {
1072 temp = vertex_stream->instance_data ? input_register1
1073 : input_register0;
1074 }
1075
1076 if (instance_data_with_base_instance)
1077 code_size += 1;
1078 }
1079
1080 /* The real code segment. */
1081 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082 /* If it's current state stream, then index = 0 always. */
1083 if (vertex_stream->current_state) {
1084 /* Put zero in temp. */
1085 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086 } else if (vertex_stream->multiplier) {
1087 /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088 * new: Iout = (Iin * Multiplier) >> (shift+31)
1089 */
1090
1091 /* Put zero in temp. Need zero for add part of the following
1092 * MAD. MAD source is 64 bit, so need two LIMMs.
1093 */
1094 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095 /* Put zero in temp. Need zero for add part of the following
1096 * MAD.
1097 */
1098 *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099
1100 /* old: (Iin * (Multiplier+2^24))
1101 * new: (Iin * Multiplier)
1102 */
1103 *buffer++ = pvr_rogue_inst_encode_mad(
1104 0, /* Sign of add is positive. */
1105 0, /* Unsigned ALU mode */
1106 0, /* Unconditional */
1107 multiplier_constant32,
1108 vertex_stream->instance_data ? input_register1 : input_register0,
1109 temp / 2,
1110 temp / 2);
1111
1112 if (vertex_stream->shift) {
1113 int32_t shift = (int32_t)vertex_stream->shift;
1114
1115 /* new: >> (shift + 31) */
1116 shift += 31;
1117 shift *= -1;
1118
1119 if (shift < -31) {
1120 /* >> (31) */
1121 shift_2s_comp = 0xFFFE1;
1122 *buffer++ = pvr_pds_inst_encode_sftlp64(
1123 /* cc */ 0,
1124 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125 /* IM */ 1, /* enable immediate */
1126 /* SRC0 */ temp / 2,
1127 /* SRC1 */ input_register0, /* This won't be used in
1128 * a shift operation.
1129 */
1130 /* SRC2 (Shift) */ shift_2s_comp,
1131 /* DST */ temp / 2);
1132 shift += 31;
1133 }
1134
1135 /* old: >> (Shift+24)
1136 * new: >> (shift + 31)
1137 */
1138 shift_2s_comp = *((uint32_t *)&shift);
1139 *buffer++ = pvr_pds_inst_encode_sftlp64(
1140 /* cc */ 0,
1141 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142 /* IM */ 1, /*enable immediate */
1143 /* SRC0 */ temp / 2,
1144 /* SRC1 */ input_register0, /* This won't be used in
1145 * a shift operation.
1146 */
1147 /* SRC2 (Shift) */ shift_2s_comp,
1148 /* DST */ temp / 2);
1149 }
1150
1151 if (instance_data_with_base_instance) {
1152 *buffer++ =
1153 pvr_pds_inst_encode_add32(0, /* cc */
1154 0, /* ALNUM */
1155 0, /* SNA */
1156 base_instance_register, /* src0
1157 */
1158 temp, /* src1 */
1159 temp /* dst */
1160 );
1161 }
1162 } else { /* NOT vertex_stream->multiplier */
1163 if (vertex_stream->shift) {
1164 /* Shift Index/InstanceNum Right by shift bits. Put result
1165 * in a Temp.
1166 */
1167
1168 /* 2's complement of shift as this will be a right shift. */
1169 shift_2s_comp = ~(vertex_stream->shift) + 1;
1170
1171 *buffer++ = pvr_pds_inst_encode_sftlp32(
1172 /* IM */ 1, /* enable immediate. */
1173 /* cc */ 0,
1174 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175 /* SRC0 */ vertex_stream->instance_data ? input_register1
1176 : input_register0,
1177 /* SRC1 */ input_register0, /* This won't be used in
1178 * a shift operation.
1179 */
1180 /* SRC2 (Shift) */ shift_2s_comp,
1181 /* DST */ temp);
1182
1183 if (instance_data_with_base_instance) {
1184 *buffer++ =
1185 pvr_pds_inst_encode_add32(0, /* cc */
1186 0, /* ALNUM */
1187 0, /* SNA */
1188 base_instance_register, /* src0
1189 */
1190 temp, /* src1 */
1191 temp /* dst */
1192 );
1193 }
1194 } else {
1195 if (instance_data_with_base_instance) {
1196 *buffer++ =
1197 pvr_pds_inst_encode_add32(0, /* cc */
1198 0, /* ALNUM */
1199 0, /* SNA */
1200 base_instance_register, /* src0
1201 */
1202 input_register1, /* src1 */
1203 temp /* dst */
1204 );
1205 } else {
1206 /* If the shift instruction doesn't happen, use the IR
1207 * directly into the following MAD.
1208 */
1209 temp = vertex_stream->instance_data ? input_register1
1210 : input_register0;
1211 }
1212 }
1213 }
1214 }
1215
1216 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217 if (vertex_stream->use_ddmadt)
1218 ddmadt_enables |= (1 << stream);
1219 } else {
1220 if ((ddmadt_enables & (1 << stream)) != 0) {
1221 /* Emulate what DDMADT does for range checking. */
1222 if (first_ddmadt) {
1223 /* Get an 64 bits temp such that cmp current index with
1224 * allowed vertex number can work.
1225 */
1226 index_temp64 =
1227 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228 */
1229 num_vertices_temp64 =
1230 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231 */
1232
1233 index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234 num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235
1236 code_size += 3;
1237 current_p0 = true;
1238 }
1239
1240 code_size += (temp == pre_index_temp ? 1 : 2);
1241
1242 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243 if (first_ddmadt) {
1244 /* Set predicate to be P0. */
1245 *buffer++ = pvr_pds_encode_bra(
1246 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247 */
1248 0, /* Neg */
1249 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250 */
1251 1); /* Addr */
1252
1253 *buffer++ =
1254 pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255 *buffer++ =
1256 pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257 }
1258
1259 if (temp != pre_index_temp) {
1260 *buffer++ = pvr_pds_inst_encode_sftlp32(
1261 /* IM */ 1, /* enable immediate. */
1262 /* cc */ 0,
1263 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264 /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265 /* SRC1 */ 0,
1266 /* SRC2 (Shift) */ 0,
1267 /* DST */ index_temp64);
1268 }
1269
1270 *buffer++ = pvr_pds_inst_encode_sftlp32(
1271 /* IM */ 1, /* enable immediate. */
1272 /* cc */ 0,
1273 /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274 /* SRC0 */ num_vertices_temp64 + 1,
1275 /* SRC1 */ vertex_stream->num_vertices,
1276 /* SRC2 (Shift) */ 0,
1277 /* DST */ num_vertices_temp64);
1278 }
1279
1280 first_ddmadt = false;
1281
1282 pre_index_temp = temp;
1283 }
1284 }
1285
1286 /* Process the elements in the stream. */
1287 for (uint32_t element = 0; element < vertex_stream->num_elements;
1288 element++) {
1289 bool terminate = false;
1290
1291 vertex_element = &vertex_stream->elements[element];
1292 /* Check if last DDMAD needs terminate or not. */
1293 if ((element == (vertex_stream->num_elements - 1)) &&
1294 (stream == last_stream_index)) {
1295 terminate = !issue_empty_ddmad && !direct_writes_needed;
1296 }
1297
1298 /* Get a new set of constants for this element. */
1299 if (element) {
1300 /* Get all 8 32 bit constants at once. */
1301 next_constant =
1302 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303 }
1304
1305 dma_address_constant64 = next_constant + 4;
1306 dma_control_constant64 = dma_address_constant64 + 2;
1307
1308 if (vertex_element->component_size == 0) {
1309 /* Standard DMA.
1310 *
1311 * Write the DMA transfer control words into the PDS data
1312 * section.
1313 *
1314 * DMA Address is 40-bit.
1315 */
1316
1317 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318 uint32_t dma_control_word;
1319 uint64_t dma_control_word64 = 0;
1320 uint32_t dma_size;
1321
1322 /* Write the address to the constant. */
1323 pvr_pds_write_dma_address(buffer,
1324 dma_address_constant64,
1325 vertex_stream->address +
1326 (uint64_t)vertex_element->offset,
1327 false,
1328 dev_info);
1329 {
1330 if (program->stream_patch_offsets) {
1331 program
1332 ->stream_patch_offsets[program->num_stream_patches++] =
1333 (stream << 16) | (dma_address_constant64 >> 1);
1334 }
1335 }
1336
1337 /* Size is in bytes - round up to nearest 32 bit word. */
1338 dma_size =
1339 (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340 PVR_PDS_DWORD_SHIFT;
1341
1342 assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343
1344 /* Set up the dma transfer control word. */
1345 dma_control_word =
1346 dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347
1348 dma_control_word |=
1349 vertex_element->reg
1350 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351
1352 dma_control_word |=
1353 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355
1356 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357 if ((ddmadt_enables & (1 << stream)) != 0) {
1358 assert(
1359 ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363 (uint64_t)vertex_stream->buffer_size_in_bytes);
1364 dma_control_word64 =
1365 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366 (((uint64_t)vertex_stream->buffer_size_in_bytes
1367 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369 }
1370 }
1371 /* If this is the last dma then also set the last flag. */
1372 if (terminate) {
1373 dma_control_word |=
1374 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375 }
1376
1377 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378 * spec.
1379 */
1380 pvr_pds_write_wide_constant(buffer,
1381 dma_control_constant64,
1382 dma_control_word64 |
1383 (uint64_t)dma_control_word);
1384 }
1385
1386 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388 if ((ddmadt_enables & (1 << stream)) != 0) {
1389 *buffer++ = pvr_pds_inst_encode_cmp(
1390 0, /* cc enable */
1391 PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392 index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393 (num_vertices_temp64 >> 1) +
1394 PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395 (REGS64)
1396 */
1397 }
1398 }
1399 /* Multiply by the vertex stream stride and add the base
1400 * followed by a DOUTD.
1401 *
1402 * dmad32 (C0 * T0) + C1, C2
1403 * src0 = stride src1 = index src2 = baseaddr src3 =
1404 * doutd part
1405 */
1406
1407 uint32_t cc;
1408 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409 cc = 0;
1410 else
1411 cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412
1413 *buffer++ = pvr_pds_inst_encode_ddmad(
1414 /* cc */ cc,
1415 /* END */ 0,
1416 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417 /* SRC1 */ temp, /* Index 32-bit*/
1418 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419 * Address
1420 * +
1421 * Offset
1422 */
1423 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424 * Transfer
1425 * Control
1426 * Word.
1427 */
1428 );
1429 }
1430
1431 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432 ((ddmadt_enables & (1 << stream)) != 0)) {
1433 code_size += 1;
1434 }
1435 code_size += 1;
1436 } else {
1437 /* Repeat DMA.
1438 *
1439 * Write the DMA transfer control words into the PDS data
1440 * section.
1441 *
1442 * DMA address is 40-bit.
1443 */
1444
1445 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446 uint32_t dma_control_word;
1447
1448 /* Write the address to the constant. */
1449 pvr_pds_write_dma_address(buffer,
1450 dma_address_constant64,
1451 vertex_stream->address +
1452 (uint64_t)vertex_element->offset,
1453 false,
1454 dev_info);
1455
1456 /* Set up the DMA transfer control word. */
1457 dma_control_word =
1458 vertex_element->size
1459 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460
1461 dma_control_word |=
1462 vertex_element->reg
1463 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464
1465 switch (vertex_element->component_size) {
1466 case 4: {
1467 dma_control_word |=
1468 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469 break;
1470 }
1471 case 3: {
1472 dma_control_word |=
1473 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474 break;
1475 }
1476 case 2: {
1477 dma_control_word |=
1478 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479 break;
1480 }
1481 default: {
1482 dma_control_word |=
1483 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484 break;
1485 }
1486 }
1487
1488 dma_control_word |=
1489 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490
1491 dma_control_word |=
1492 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494
1495 /* If this is the last dma then also set the last flag. */
1496 if (terminate) {
1497 dma_control_word |=
1498 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499 }
1500
1501 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502 * spec.
1503 */
1504 pvr_pds_write_wide_constant(buffer,
1505 dma_control_constant64,
1506 (uint64_t)dma_control_word);
1507 }
1508
1509 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510 /* Multiply by the vertex stream stride and add the base
1511 * followed by a DOUTD.
1512 *
1513 * dmad32 (C0 * T0) + C1, C2
1514 * src0 = stride src1 = index src2 = baseaddr src3 =
1515 * doutd part
1516 */
1517 *buffer++ = pvr_pds_inst_encode_ddmad(
1518 /* cc */ 0,
1519 /* END */ 0,
1520 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521 /* SRC1 */ temp, /* Index 32-bit*/
1522 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523 * Address
1524 * +
1525 * Offset.
1526 */
1527 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528 * Transfer
1529 * Control
1530 * Word.
1531 */
1532 );
1533 }
1534
1535 code_size += 1;
1536 } /* End of repeat DMA. */
1537 } /* Element loop */
1538 } /* Stream loop */
1539
1540 if (issue_empty_ddmad) {
1541 /* Issue an empty last DDMAD, always executed. */
1542 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543 pvr_pds_write_wide_constant(
1544 buffer,
1545 empty_dma_control_constant64,
1546 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547 }
1548
1549 code_size += 1;
1550
1551 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552 *buffer++ = pvr_pds_inst_encode_ddmad(
1553 /* cc */ 0,
1554 /* END */ 0,
1555 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556 /* SRC1 */ temp, /* Index 32-bit*/
1557 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558 *Address +
1559 *Offset.
1560 */
1561 /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562 * Transfer
1563 * Control
1564 * Word.
1565 */
1566 );
1567 }
1568 }
1569
1570 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571 if (current_p0) {
1572 code_size += 1;
1573
1574 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575 /* Revert predicate back to IF0 which is required by DOUTU. */
1576 *buffer++ =
1577 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578 */
1579 0, /* Neg */
1580 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581 */
1582 1); /* Addr */
1583 }
1584 }
1585 }
1586 /* Send VertexID if requested. */
1587 if (program->iterate_vtx_id) {
1588 if (program->draw_indirect) {
1589 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590 *buffer++ = pvr_pds_inst_encode_add32(
1591 /* cc */ 0x0,
1592 /* ALUM */ 0, /* Unsigned */
1593 /* SNA */ 1, /* Minus */
1594 /* SRC0 32b */ input_register0, /* vertexID */
1595 /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596 * vertexID.
1597 */
1598 /* DST 32b */ input_register0);
1599 }
1600
1601 code_size += 1;
1602 }
1603
1604 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605 uint32_t doutw = pvr_pds_encode_doutw_src1(
1606 program->vtx_id_register,
1607 PVR_PDS_DOUTW_LOWER32,
1608 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609 false,
1610 dev_info);
1611
1612 if (!program->iterate_instance_id && !program->iterate_remap_id)
1613 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614
1615 pvr_pds_write_constant32(buffer,
1616 vertex_id_control_word_const32,
1617 doutw);
1618 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619 *buffer++ = pvr_pds_encode_doutw64(
1620 /* cc */ 0,
1621 /* END */ 0,
1622 /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623 */
1624 /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625 }
1626
1627 code_size += 1;
1628 }
1629
1630 /* Send InstanceID if requested. */
1631 if (program->iterate_instance_id) {
1632 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633 uint32_t doutw = pvr_pds_encode_doutw_src1(
1634 program->instance_id_register,
1635 PVR_PDS_DOUTW_UPPER32,
1636 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637 true,
1638 dev_info);
1639
1640 if (!program->iterate_remap_id)
1641 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642
1643 pvr_pds_write_constant32(buffer,
1644 instance_id_control_word_const32,
1645 doutw);
1646 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647 *buffer++ = pvr_pds_encode_doutw64(
1648 /* cc */ 0,
1649 /* END */ 0,
1650 /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651 /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652 }
1653
1654 code_size += 1;
1655 }
1656
1657 /* Send remapped index number to vi0. */
1658 if (program->iterate_remap_id) {
1659 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660 uint32_t doutw = pvr_pds_encode_doutw_src1(
1661 0 /* vi0 */,
1662 PVR_PDS_DOUTW_LOWER32,
1663 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665 false,
1666 dev_info);
1667
1668 pvr_pds_write_constant64(buffer,
1669 geometry_id_control_word_const64,
1670 doutw,
1671 0);
1672 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673 *buffer++ = pvr_pds_encode_doutw64(
1674 /* cc */ 0,
1675 /* END */ 0,
1676 /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677 * Src1
1678 */
1679 /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680 }
1681
1682 code_size += 1;
1683 }
1684
1685 /* Copy the USC task control words to constants. */
1686 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687 pvr_pds_write_wide_constant(buffer,
1688 usc_control_constant64,
1689 program->usc_task_control.src0); /* 64-bit
1690 * Src0
1691 */
1692 if (program->stream_patch_offsets) {
1693 /* USC TaskControl is always the first patch. */
1694 program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695 }
1696 }
1697
1698 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699 /* Conditionally (if last in task) issue the task to the USC
1700 * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701 */
1702
1703 *buffer++ = pvr_pds_encode_doutu(
1704 /* cc */ 1,
1705 /* END */ 1,
1706 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707
1708 /* End the program if the Dout did not already end it. */
1709 *buffer++ = pvr_pds_inst_encode_halt(0);
1710 }
1711
1712 code_size += 2;
1713
1714 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715 /* Set the data segment pointer and ensure we return 1 past the buffer
1716 * ptr.
1717 */
1718 program->data_segment = buffer;
1719
1720 buffer += consts_size;
1721 }
1722
1723 program->temps_used = temps_used;
1724 program->data_size = consts_size;
1725 program->code_size = code_size;
1726 program->ddmadt_enables = ddmadt_enables;
1727 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728 program->skip_stream_flag = skip_stream_flag;
1729
1730 return buffer;
1731 }
1732
1733 /**
1734 * Generates a PDS program to load USC compute shader global/local/workgroup
1735 * sizes/ids and then a DOUTU to execute the USC.
1736 *
1737 * \param program Pointer to description of the program that should be
1738 * generated.
1739 * \param buffer Pointer to buffer that receives the output of this function.
1740 * This will be either the data segment, or the code depending on
1741 * gen_mode.
1742 * \param gen_mode Which part to generate, either data segment or code segment.
1743 * If PDS_GENERATE_SIZES is specified, nothing is written, but
1744 * size information in program is updated.
1745 * \param dev_info PVR device info struct.
1746 * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747 * the buffer after writing its contents.
1748 */
1749 uint32_t *
pvr_pds_compute_shader(struct pvr_pds_compute_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)1750 pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751 uint32_t *restrict buffer,
1752 enum pvr_pds_generate_mode gen_mode,
1753 const struct pvr_device_info *dev_info)
1754 {
1755 uint32_t usc_control_constant64;
1756 uint32_t usc_control_constant64_coeff_update = 0;
1757 uint32_t zero_constant64 = 0;
1758
1759 uint32_t data_size = 0;
1760 uint32_t code_size = 0;
1761 uint32_t temps_used = 0;
1762 uint32_t doutw = 0;
1763
1764 uint32_t barrier_ctrl_word = 0;
1765 uint32_t barrier_ctrl_word2 = 0;
1766
1767 /* Even though there are 3 IDs for local and global we only need max one
1768 * DOUTW for local, and two for global.
1769 */
1770 uint32_t work_group_id_ctrl_words[2] = { 0 };
1771 uint32_t local_id_ctrl_word = 0;
1772 uint32_t local_input_register;
1773
1774 /* For the constant value to load into ptemp (SW fence). */
1775 uint64_t predicate_ld_src0_constant = 0;
1776 uint32_t cond_render_negate_constant = 0;
1777
1778 uint32_t cond_render_pred_temp;
1779 uint32_t cond_render_negate_temp;
1780
1781 /* 2x 64 bit registers that will mask out the Predicate load. */
1782 uint32_t cond_render_pred_mask_constant = 0;
1783
1784 #if MESA_DEBUG
1785 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786 for (uint32_t j = 0; j < program->data_size; j++)
1787 buffer[j] = 0xDEADBEEF;
1788 }
1789 #endif
1790
1791 /* All the compute input registers are in temps. */
1792 temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793
1794 uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795
1796 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797
1798 if (program->kick_usc) {
1799 /* Copy the USC task control words to constants. */
1800 usc_control_constant64 =
1801 pvr_pds_get_constants(&next_constant, 2, &data_size);
1802 }
1803
1804 if (program->has_coefficient_update_task) {
1805 usc_control_constant64_coeff_update =
1806 pvr_pds_get_constants(&next_constant, 2, &data_size);
1807 }
1808
1809 if (program->conditional_render) {
1810 predicate_ld_src0_constant =
1811 pvr_pds_get_constants(&next_constant, 2, &data_size);
1812 cond_render_negate_constant =
1813 pvr_pds_get_constants(&next_constant, 2, &data_size);
1814 cond_render_pred_mask_constant =
1815 pvr_pds_get_constants(&next_constant, 4, &data_size);
1816
1817 /* LD will load a 64 bit value. */
1818 cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819 cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820
1821 program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822 program->cond_render_pred_temp = cond_render_pred_temp;
1823 }
1824
1825 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1826 (program->clear_pds_barrier) ||
1827 (program->kick_usc && program->conditional_render)) {
1828 zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829 }
1830
1831 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1832 barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834 barrier_ctrl_word2 =
1835 pvr_pds_get_constants(&next_constant, 1, &data_size);
1836 }
1837 }
1838
1839 if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
1840 program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1841 work_group_id_ctrl_words[0] =
1842 pvr_pds_get_constants(&next_constant, 1, &data_size);
1843 }
1844
1845 if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1846 work_group_id_ctrl_words[1] =
1847 pvr_pds_get_constants(&next_constant, 1, &data_size);
1848 }
1849
1850 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1851 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1852 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1853 local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854 }
1855
1856 if (program->add_base_workgroup) {
1857 for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858 workgroup_component++) {
1859 if (program->work_group_input_regs[workgroup_component] !=
1860 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1861 program
1862 ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863 pvr_pds_get_constants(&next_constant, 1, &data_size);
1864 }
1865 }
1866 }
1867
1868 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869 if (program->kick_usc) {
1870 /* Src0 for DOUTU */
1871 pvr_pds_write_wide_constant(buffer,
1872 usc_control_constant64,
1873 program->usc_task_control.src0); /* 64-bit
1874 * Src0.
1875 */
1876 }
1877
1878 if (program->has_coefficient_update_task) {
1879 /* Src0 for DOUTU. */
1880 pvr_pds_write_wide_constant(
1881 buffer,
1882 usc_control_constant64_coeff_update,
1883 program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884 }
1885
1886 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1887 (program->clear_pds_barrier) ||
1888 (program->kick_usc && program->conditional_render)) {
1889 pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890 * Src0
1891 */
1892 }
1893
1894 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1895 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896 /* Write the constant for the coefficient register write. */
1897 doutw = pvr_pds_encode_doutw_src1(
1898 program->barrier_coefficient + 4,
1899 PVR_PDS_DOUTW_LOWER64,
1900 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901 true,
1902 dev_info);
1903 pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904 }
1905 /* Write the constant for the coefficient register write. */
1906 doutw = pvr_pds_encode_doutw_src1(
1907 program->barrier_coefficient,
1908 PVR_PDS_DOUTW_LOWER64,
1909 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910 true,
1911 dev_info);
1912
1913 /* Check whether the barrier is going to be the last DOUTW done by
1914 * the coefficient sync task.
1915 */
1916 if ((program->work_group_input_regs[0] ==
1917 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1918 (program->work_group_input_regs[1] ==
1919 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1920 (program->work_group_input_regs[2] ==
1921 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1922 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1923 }
1924
1925 pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1926 }
1927
1928 /* If we want work-group id X, see if we also want work-group id Y. */
1929 if (program->work_group_input_regs[0] !=
1930 PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
1931 program->work_group_input_regs[1] !=
1932 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1933 /* Make sure we are going to DOUTW them into adjacent registers
1934 * otherwise we can't do it in one.
1935 */
1936 assert(program->work_group_input_regs[1] ==
1937 (program->work_group_input_regs[0] + 1));
1938
1939 doutw = pvr_pds_encode_doutw_src1(
1940 program->work_group_input_regs[0],
1941 PVR_PDS_DOUTW_LOWER64,
1942 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1943 true,
1944 dev_info);
1945
1946 /* If we don't want the Z work-group id then this is the last one.
1947 */
1948 if (program->work_group_input_regs[2] ==
1949 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1950 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1951 }
1952
1953 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1954 }
1955 /* If we only want one of X or Y then handle them separately. */
1956 else {
1957 if (program->work_group_input_regs[0] !=
1958 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1959 doutw = pvr_pds_encode_doutw_src1(
1960 program->work_group_input_regs[0],
1961 PVR_PDS_DOUTW_LOWER32,
1962 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1963 true,
1964 dev_info);
1965
1966 /* If we don't want the Z work-group id then this is the last
1967 * one.
1968 */
1969 if (program->work_group_input_regs[2] ==
1970 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1971 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1972 }
1973
1974 pvr_pds_write_constant32(buffer,
1975 work_group_id_ctrl_words[0],
1976 doutw);
1977 } else if (program->work_group_input_regs[1] !=
1978 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1979 doutw = pvr_pds_encode_doutw_src1(
1980 program->work_group_input_regs[1],
1981 PVR_PDS_DOUTW_UPPER32,
1982 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1983 true,
1984 dev_info);
1985
1986 /* If we don't want the Z work-group id then this is the last
1987 * one.
1988 */
1989 if (program->work_group_input_regs[2] ==
1990 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1991 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1992 }
1993
1994 pvr_pds_write_constant32(buffer,
1995 work_group_id_ctrl_words[0],
1996 doutw);
1997 }
1998 }
1999
2000 /* Handle work-group id Z. */
2001 if (program->work_group_input_regs[2] !=
2002 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2003 doutw = pvr_pds_encode_doutw_src1(
2004 program->work_group_input_regs[2],
2005 PVR_PDS_DOUTW_UPPER32,
2006 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
2007 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2008 true,
2009 dev_info);
2010
2011 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
2012 }
2013
2014 /* Handle the local IDs. */
2015 if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2016 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2017 uint32_t dest_reg;
2018
2019 /* If we want local id Y and Z make sure the compiler wants them in
2020 * the same register.
2021 */
2022 if (!program->flattened_work_groups) {
2023 if ((program->local_input_regs[1] !=
2024 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2025 (program->local_input_regs[2] !=
2026 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2027 assert(program->local_input_regs[1] ==
2028 program->local_input_regs[2]);
2029 }
2030 }
2031
2032 if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2033 dest_reg = program->local_input_regs[1];
2034 else
2035 dest_reg = program->local_input_regs[2];
2036
2037 /* If we want local id X and (Y or Z) then we can do that in a
2038 * single 64-bit DOUTW.
2039 */
2040 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2041 assert(dest_reg == (program->local_input_regs[0] + 1));
2042
2043 doutw = pvr_pds_encode_doutw_src1(
2044 program->local_input_regs[0],
2045 PVR_PDS_DOUTW_LOWER64,
2046 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2047 true,
2048 dev_info);
2049
2050 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2051
2052 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2053 }
2054 /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2055 */
2056 else {
2057 doutw = pvr_pds_encode_doutw_src1(
2058 dest_reg,
2059 PVR_PDS_DOUTW_UPPER32,
2060 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2061 true,
2062 dev_info);
2063
2064 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2065
2066 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2067 }
2068 }
2069 /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2070 */
2071 else if (program->local_input_regs[0] !=
2072 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2073 doutw = pvr_pds_encode_doutw_src1(
2074 program->local_input_regs[0],
2075 PVR_PDS_DOUTW_LOWER32,
2076 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2077 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2078 true,
2079 dev_info);
2080
2081 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2082 }
2083 }
2084
2085 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2086 gen_mode == PDS_GENERATE_SIZES) {
2087 const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2088 #define APPEND(X) \
2089 if (encode) { \
2090 *buffer = X; \
2091 buffer++; \
2092 } else { \
2093 code_size += sizeof(uint32_t); \
2094 }
2095
2096 /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2097 * then we will be doing an infinite loop.
2098 */
2099 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2100 assert(program->coeff_update_task_branch_size > 0);
2101
2102 /* Test whether this is the coefficient update task or not. */
2103 APPEND(
2104 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2105 PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2106 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2107 program->coeff_update_task_branch_size /* ADDR */));
2108
2109 /* Do we need to initialize the barrier coefficient? */
2110 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2111 if (PVR_HAS_QUIRK(dev_info, 51210)) {
2112 /* Initialize the second barrier coefficient registers to zero.
2113 */
2114 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2115 0, /* END */
2116 barrier_ctrl_word2, /* SRC1 */
2117 zero_constant64 >> 1)); /* SRC0 */
2118 }
2119 /* Initialize the coefficient register to zero. */
2120 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2121 0, /* END */
2122 barrier_ctrl_word, /* SRC1 */
2123 zero_constant64 >> 1)); /* SRC0 */
2124 }
2125
2126 if (program->add_base_workgroup) {
2127 const uint32_t temp_values[3] = { 0, 1, 3 };
2128 for (uint32_t workgroup_component = 0; workgroup_component < 3;
2129 workgroup_component++) {
2130 if (program->work_group_input_regs[workgroup_component] ==
2131 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2132 continue;
2133
2134 APPEND(pvr_pds_inst_encode_add32(
2135 /* cc */ 0x0,
2136 /* ALUM */ 0,
2137 /* SNA */ 0,
2138 /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2139 program->base_workgroup_constant_offset_in_dwords
2140 [workgroup_component],
2141 /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2142 PVR_PDS_CDM_WORK_GROUP_ID_X +
2143 temp_values[workgroup_component],
2144 /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2145 PVR_PDS_CDM_WORK_GROUP_ID_X +
2146 temp_values[workgroup_component]));
2147 }
2148 }
2149
2150 /* If we are going to put the work-group IDs in coefficients then we
2151 * just need to do the DOUTWs.
2152 */
2153 if ((program->work_group_input_regs[0] !=
2154 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2155 (program->work_group_input_regs[1] !=
2156 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2157 uint32_t dest_reg;
2158
2159 if (program->work_group_input_regs[0] !=
2160 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2161 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2162 } else {
2163 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2164 }
2165
2166 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2167 0, /* END */
2168 work_group_id_ctrl_words[0], /* SRC1
2169 */
2170 dest_reg >> 1)); /* SRC0 */
2171 }
2172
2173 if (program->work_group_input_regs[2] !=
2174 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2175 APPEND(pvr_pds_encode_doutw64(
2176 0, /* cc */
2177 0, /* END */
2178 work_group_id_ctrl_words[1], /* SRC1 */
2179 (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2180 1)); /* SRC0 */
2181 }
2182
2183 /* Issue the task to the USC. */
2184 if (program->kick_usc && program->has_coefficient_update_task) {
2185 APPEND(pvr_pds_encode_doutu(0, /* cc */
2186 1, /* END */
2187 usc_control_constant64_coeff_update >>
2188 1)); /* SRC0; DOUTU 64-bit Src0 */
2189 }
2190
2191 /* Encode a HALT */
2192 APPEND(pvr_pds_inst_encode_halt(0));
2193
2194 /* Set the branch size used to skip the coefficient sync task. */
2195 program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2196
2197 /* DOUTW in the local IDs. */
2198
2199 /* If we want X and Y or Z, we only need one DOUTW. */
2200 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2201 ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2202 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
2203 local_input_register =
2204 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2205 } else {
2206 /* If we just want X. */
2207 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2208 local_input_register =
2209 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2210 }
2211 /* If we just want Y or Z. */
2212 else if (program->local_input_regs[1] !=
2213 PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
2214 program->local_input_regs[2] !=
2215 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2216 local_input_register =
2217 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2218 }
2219 }
2220
2221 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2222 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2223 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2224 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2225 0, /* END */
2226 local_id_ctrl_word, /* SRC1 */
2227 local_input_register >> 1)); /* SRC0
2228 */
2229 }
2230
2231 if (program->clear_pds_barrier) {
2232 /* Zero the persistent temp (SW fence for context switch). */
2233 APPEND(pvr_pds_inst_encode_add64(
2234 0, /* cc */
2235 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2236 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2237 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2238 (zero_constant64 >> 1), /* src0 = 0 */
2239 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2240 (zero_constant64 >> 1), /* src1 = 0 */
2241 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2242 * ptemp64[0]
2243 */
2244 }
2245
2246 /* If this is a fence, issue the DOUTC. */
2247 if (program->fence) {
2248 APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2249 0 /* END */));
2250 }
2251
2252 if (program->kick_usc) {
2253 if (program->conditional_render) {
2254 /* Skip if coefficient update task. */
2255 APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2256 0,
2257 PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2258 16));
2259
2260 /* Load the predicate. */
2261 APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2262
2263 /* Load negate constant into temp for CMP. */
2264 APPEND(pvr_pds_inst_encode_add64(
2265 0, /* cc */
2266 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2267 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2268 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2269 (cond_render_negate_constant >> 1), /* src0 = 0 */
2270 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2271 (zero_constant64 >> 1), /* src1 = 0 */
2272 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2273 (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2274 */
2275
2276 APPEND(pvr_pds_inst_encode_wdf(0));
2277
2278 for (uint32_t i = 0; i < 4; i++) {
2279 APPEND(pvr_pds_inst_encode_sftlp32(
2280 1, /* enable immediate */
2281 0, /* cc */
2282 PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2283 cond_render_pred_temp + i, /* SRC0 */
2284 cond_render_pred_mask_constant + i, /* SRC1 */
2285 0, /* SRC2 (Shift) */
2286 cond_render_pred_temp + i)); /* DST */
2287
2288 APPEND(
2289 pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2290 0, /* cc */
2291 PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2292 */
2293 cond_render_pred_temp + i, /* SRC0
2294 */
2295 cond_render_pred_temp, /* SRC1 */
2296 0, /* SRC2 (Shift) */
2297 cond_render_pred_temp)); /* DST */
2298 }
2299
2300 APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2301 cond_render_pred_temp + 1, /* SRC1
2302 */
2303 0, /* SRC0 */
2304 0)); /* GLOBALREG */
2305
2306 APPEND(pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2307 0, /* cc */
2308 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2309 */
2310 cond_render_pred_temp, /* SRC0 */
2311 cond_render_negate_temp, /* SRC1
2312 */
2313 0, /* SRC2 (Shift) */
2314 cond_render_pred_temp)); /* DST
2315 */
2316
2317 /* Check that the predicate is 0. */
2318 APPEND(pvr_pds_inst_encode_cmpi(
2319 0, /* cc */
2320 PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2321 (cond_render_pred_temp >> 1) +
2322 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2323 0)); /* SRC1 */
2324
2325 /* If predicate is 0, skip DOUTU. */
2326 APPEND(pvr_pds_inst_encode_bra(
2327 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2328 P0 */
2329 0, /* NEG */
2330 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2331 keep
2332 */
2333 2));
2334 }
2335
2336 /* Issue the task to the USC.
2337 * DoutU src1=USC Code Base address, src2=doutu word 2.
2338 */
2339 APPEND(pvr_pds_encode_doutu(1, /* cc */
2340 1, /* END */
2341 usc_control_constant64 >> 1)); /* SRC0;
2342 * DOUTU
2343 * 64-bit
2344 * Src0.
2345 */
2346 }
2347
2348 /* End the program if the Dout did not already end it. */
2349 APPEND(pvr_pds_inst_encode_halt(0));
2350 #undef APPEND
2351 }
2352
2353 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2354 /* Set the data segment pointer and ensure we return 1 past the buffer
2355 * ptr.
2356 */
2357 program->data_segment = buffer;
2358
2359 buffer += next_constant;
2360 }
2361
2362 /* Require at least one DWORD of PDS data so the program runs. */
2363 data_size = MAX2(1, data_size);
2364
2365 program->temps_used = temps_used;
2366 program->highest_temp = temps_used;
2367 program->data_size = data_size;
2368 if (gen_mode == PDS_GENERATE_SIZES)
2369 program->code_size = code_size;
2370
2371 return buffer;
2372 }
2373
2374 /**
2375 * Generates the PDS vertex shader data or code block. This program will do a
2376 * DMA into USC Constants followed by a DOUTU.
2377 *
2378 * \param program Pointer to the PDS vertex shader program.
2379 * \param buffer Pointer to the buffer for the program.
2380 * \param gen_mode Generate code or data.
2381 * \param dev_info PVR device information struct.
2382 * \returns Pointer to just beyond the code/data.
2383 */
pvr_pds_vertex_shader_sa(struct pvr_pds_vertex_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)2384 uint32_t *pvr_pds_vertex_shader_sa(
2385 struct pvr_pds_vertex_shader_sa_program *restrict program,
2386 uint32_t *restrict buffer,
2387 enum pvr_pds_generate_mode gen_mode,
2388 const struct pvr_device_info *dev_info)
2389 {
2390 uint32_t next_constant;
2391 uint32_t data_size = 0;
2392 uint32_t code_size = 0;
2393
2394 uint32_t usc_control_constant64 = 0;
2395 uint32_t dma_address_constant64 = 0;
2396 uint32_t dma_control_constant32 = 0;
2397 uint32_t doutw_value_constant64 = 0;
2398 uint32_t doutw_control_constant32 = 0;
2399 uint32_t fence_constant_word = 0;
2400 uint32_t *buffer_base;
2401 uint32_t kick_index;
2402
2403 uint32_t total_num_doutw =
2404 program->num_dword_doutw + program->num_q_word_doutw;
2405 uint32_t total_size_dma =
2406 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2407
2408 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2409
2410 /* Copy the DMA control words and USC task control words to constants.
2411 *
2412 * Arrange them so that the 64-bit words are together followed by the 32-bit
2413 * words.
2414 */
2415 if (program->kick_usc) {
2416 usc_control_constant64 =
2417 pvr_pds_get_constants(&next_constant, 2, &data_size);
2418 }
2419
2420 if (program->clear_pds_barrier) {
2421 fence_constant_word =
2422 pvr_pds_get_constants(&next_constant, 2, &data_size);
2423 }
2424 dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2425 2 * program->num_dma_kicks,
2426 &data_size);
2427
2428 /* Assign all unaligned constants together to avoid alignment issues caused
2429 * by pvr_pds_get_constants with even allocation sizes.
2430 */
2431 doutw_value_constant64 = pvr_pds_get_constants(
2432 &next_constant,
2433 total_size_dma + total_num_doutw + program->num_dma_kicks,
2434 &data_size);
2435 doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2436 dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2437
2438 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2439 buffer_base = buffer;
2440
2441 if (program->kick_usc) {
2442 /* Src0 for DOUTU. */
2443 pvr_pds_write_wide_constant(buffer_base,
2444 usc_control_constant64,
2445 program->usc_task_control.src0); /* DOUTU
2446 * 64-bit
2447 * Src0.
2448 */
2449 buffer += 2;
2450 }
2451
2452 if (program->clear_pds_barrier) {
2453 /* Encode the fence constant src0. Fence barrier is initialized to
2454 * zero.
2455 */
2456 pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2457 buffer += 2;
2458 }
2459
2460 if (total_num_doutw > 0) {
2461 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2462 /* Write the constant for the coefficient register write. */
2463 pvr_pds_write_constant64(buffer_base,
2464 doutw_value_constant64,
2465 program->q_word_doutw_value[2 * i],
2466 program->q_word_doutw_value[2 * i + 1]);
2467 pvr_pds_write_constant32(
2468 buffer_base,
2469 doutw_control_constant32,
2470 program->q_word_doutw_control[i] |
2471 ((!program->num_dma_kicks && i == total_num_doutw - 1)
2472 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2473 : 0));
2474
2475 doutw_value_constant64 += 2;
2476 doutw_control_constant32 += 1;
2477 }
2478
2479 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2480 /* Write the constant for the coefficient register write. */
2481 pvr_pds_write_constant32(buffer_base,
2482 doutw_value_constant64,
2483 program->dword_doutw_value[i]);
2484 pvr_pds_write_constant32(
2485 buffer_base,
2486 doutw_control_constant32,
2487 program->dword_doutw_control[i] |
2488 ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2489 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2490 : 0));
2491
2492 doutw_value_constant64 += 1;
2493 doutw_control_constant32 += 1;
2494 }
2495
2496 buffer += total_size_dma + total_num_doutw;
2497 }
2498
2499 if (program->num_dma_kicks == 1) /* Most-common case. */
2500 {
2501 /* Src0 for DOUTD - Address. */
2502 pvr_pds_write_dma_address(buffer_base,
2503 dma_address_constant64,
2504 program->dma_address[0],
2505 false,
2506 dev_info);
2507
2508 /* Src1 for DOUTD - Control Word. */
2509 pvr_pds_write_constant32(
2510 buffer_base,
2511 dma_control_constant32,
2512 program->dma_control[0] |
2513 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2514
2515 /* Move the buffer ptr along as we will return 1 past the buffer. */
2516 buffer += 3;
2517 } else if (program->num_dma_kicks > 1) {
2518 for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2519 kick_index++) {
2520 /* Src0 for DOUTD - Address. */
2521 pvr_pds_write_dma_address(buffer_base,
2522 dma_address_constant64,
2523 program->dma_address[kick_index],
2524 false,
2525 dev_info);
2526
2527 /* Src1 for DOUTD - Control Word. */
2528 pvr_pds_write_constant32(buffer_base,
2529 dma_control_constant32,
2530 program->dma_control[kick_index]);
2531 dma_address_constant64 += 2;
2532 dma_control_constant32 += 1;
2533 }
2534
2535 /* Src0 for DOUTD - Address. */
2536 pvr_pds_write_dma_address(buffer_base,
2537 dma_address_constant64,
2538 program->dma_address[kick_index],
2539 false,
2540 dev_info);
2541
2542 /* Src1 for DOUTD - Control Word. */
2543 pvr_pds_write_constant32(
2544 buffer_base,
2545 dma_control_constant32,
2546 program->dma_control[kick_index] |
2547 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2548
2549 buffer += 3 * program->num_dma_kicks;
2550 }
2551 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2552 if (program->clear_pds_barrier) {
2553 /* Zero the persistent temp (SW fence for context switch). */
2554 *buffer++ = pvr_pds_inst_encode_add64(
2555 0, /* cc */
2556 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2557 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2558 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2559 (fence_constant_word >> 1), /* src0 = 0 */
2560 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2561 (fence_constant_word >> 1), /* src1 = 0 */
2562 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2563 * ptemp[0]
2564 */
2565 }
2566
2567 if (total_num_doutw > 0) {
2568 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2569 /* Set the coefficient register to data value. */
2570 *buffer++ = pvr_pds_encode_doutw64(
2571 /* cc */ 0,
2572 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2573 (i == total_num_doutw - 1),
2574 /* SRC1 */ doutw_control_constant32,
2575 /* SRC0 */ doutw_value_constant64 >> 1);
2576
2577 doutw_value_constant64 += 2;
2578 doutw_control_constant32 += 1;
2579 }
2580
2581 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2582 /* Set the coefficient register to data value. */
2583 *buffer++ = pvr_pds_encode_doutw64(
2584 /* cc */ 0,
2585 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2586 (i == program->num_dword_doutw - 1),
2587 /* SRC1 */ doutw_control_constant32,
2588 /* SRC0 */ doutw_value_constant64 >> 1);
2589
2590 doutw_value_constant64 += 1;
2591 doutw_control_constant32 += 1;
2592 }
2593 }
2594
2595 if (program->num_dma_kicks != 0) {
2596 /* DMA the state into the secondary attributes. */
2597
2598 if (program->num_dma_kicks == 1) /* Most-common case. */
2599 {
2600 *buffer++ = pvr_pds_encode_doutd(
2601 /* cc */ 0,
2602 /* END */ !program->kick_usc,
2603 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2604 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2605 * Src0.
2606 */
2607 } else {
2608 for (kick_index = 0; kick_index < program->num_dma_kicks;
2609 kick_index++) {
2610 *buffer++ = pvr_pds_encode_doutd(
2611 /* cc */ 0,
2612 /* END */ (!program->kick_usc) &&
2613 (kick_index + 1 == program->num_dma_kicks),
2614 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2615 * Src1.
2616 */
2617 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2618 * 64-bit
2619 * Src0.
2620 */
2621 dma_address_constant64 += 2;
2622 dma_control_constant32 += 1;
2623 }
2624 }
2625 }
2626
2627 if (program->kick_usc) {
2628 /* Kick the USC. */
2629 *buffer++ = pvr_pds_encode_doutu(
2630 /* cc */ 0,
2631 /* END */ 1,
2632 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2633 */
2634 }
2635
2636 if (!program->kick_usc && program->num_dma_kicks == 0 &&
2637 total_num_doutw == 0) {
2638 *buffer++ = pvr_pds_inst_encode_halt(0);
2639 }
2640 }
2641
2642 code_size = program->num_dma_kicks + total_num_doutw;
2643 if (program->clear_pds_barrier)
2644 code_size++; /* ADD64 instruction. */
2645
2646 if (program->kick_usc)
2647 code_size++;
2648
2649 /* If there are no DMAs and no USC kick then code is HALT only. */
2650 if (code_size == 0)
2651 code_size = 1;
2652
2653 program->data_size = data_size;
2654 program->code_size = code_size;
2655
2656 return buffer;
2657 }
2658
2659 /**
2660 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2661 * program.
2662 *
2663 * \param program Pointer to the PDS pixel shader secondary attributes program.
2664 * \param buffer Pointer to the buffer for the code/data.
2665 * \param gen_mode Either code or data can be generated or sizes only updated.
2666 * \returns Pointer to just beyond the buffer for the program/data.
2667 */
pvr_pds_pixel_shader_uniform_texture_code(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)2668 uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2669 struct pvr_pds_pixel_shader_sa_program *restrict program,
2670 uint32_t *restrict buffer,
2671 enum pvr_pds_generate_mode gen_mode)
2672 {
2673 uint32_t *instruction;
2674 uint32_t code_size = 0;
2675 uint32_t data_size = 0;
2676 uint32_t temps_used = 0;
2677 uint32_t next_constant;
2678
2679 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2680 0);
2681
2682 assert((gen_mode == PDS_GENERATE_CODE_SEGMENT && buffer) ||
2683 gen_mode == PDS_GENERATE_SIZES);
2684
2685 /* clang-format off */
2686 /* Shape of code segment (note: clear is different)
2687 *
2688 * Code
2689 * +------------+
2690 * | BRA if0 |
2691 * | DOUTD |
2692 * | ... |
2693 * | DOUTD.halt |
2694 * | uniform |
2695 * | DOUTD |
2696 * | ... |
2697 * | ... |
2698 * | DOUTW |
2699 * | ... |
2700 * | ... |
2701 * | DOUTU.halt |
2702 * | HALT |
2703 * +------------+
2704 */
2705 /* clang-format on */
2706 instruction = buffer;
2707
2708 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2709
2710 /* The clear color can arrive packed in the right form in the first (or
2711 * first 2) dwords of the shared registers and the program will issue a
2712 * single doutw for this.
2713 */
2714 if (program->clear && program->packed_clear) {
2715 uint32_t color_constant1 =
2716 pvr_pds_get_constants(&next_constant, 2, &data_size);
2717
2718 uint32_t control_word_constant1 =
2719 pvr_pds_get_constants(&next_constant, 2, &data_size);
2720
2721 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2722 /* DOUTW the clear color to the USC constants. Predicate with
2723 * uniform loading flag (IF0).
2724 */
2725 *instruction++ = pvr_pds_encode_doutw64(
2726 /* cc */ 1, /* Only for uniform loading program. */
2727 /* END */ program->kick_usc ? 0 : 1, /* Last
2728 * instruction
2729 * for a clear.
2730 */
2731 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2732 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2733
2734 code_size += 1;
2735 }
2736 } else if (program->clear) {
2737 uint32_t color_constant1, color_constant2;
2738
2739 if (program->clear_color_dest_reg & 0x1) {
2740 uint32_t color_constant3, control_word_constant1,
2741 control_word_constant2, color_constant4;
2742
2743 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2744 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2745 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2746
2747 control_word_constant1 =
2748 pvr_pds_get_constants(&next_constant, 2, &data_size);
2749 control_word_constant2 =
2750 pvr_pds_get_constants(&next_constant, 2, &data_size);
2751 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2752
2753 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2754 /* DOUTW the clear color to the USSE constants. Predicate with
2755 * uniform loading flag (IF0).
2756 */
2757 *instruction++ = pvr_pds_encode_doutw64(
2758 /* cc */ 1, /* Only for Uniform Loading program */
2759 /* END */ 0,
2760 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2761 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2762
2763 *instruction++ = pvr_pds_encode_doutw64(
2764 /* cc */ 1, /* Only for Uniform Loading program */
2765 /* END */ 0,
2766 /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2767 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2768
2769 *instruction++ = pvr_pds_encode_doutw64(
2770 /* cc */ 1, /* Only for uniform loading program */
2771 /* END */ program->kick_usc ? 0 : 1, /* Last
2772 * instruction
2773 * for a clear.
2774 */
2775 /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2776 /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2777 }
2778
2779 code_size += 3;
2780 } else {
2781 uint32_t control_word_constant, control_word_last_constant;
2782
2783 /* Put the clear color and control words into the first 8
2784 * constants.
2785 */
2786 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2787 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2788 control_word_constant =
2789 pvr_pds_get_constants(&next_constant, 2, &data_size);
2790 control_word_last_constant =
2791 pvr_pds_get_constants(&next_constant, 2, &data_size);
2792
2793 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2794 /* DOUTW the clear color to the USSE constants. Predicate with
2795 * uniform loading flag (IF0).
2796 */
2797 *instruction++ = pvr_pds_encode_doutw64(
2798 /* cc */ 1, /* Only for Uniform Loading program */
2799 /* END */ 0,
2800 /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2801 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2802
2803 *instruction++ = pvr_pds_encode_doutw64(
2804 /* cc */ 1, /* Only for uniform loading program */
2805 /* END */ program->kick_usc ? 0 : 1, /* Last
2806 * instruction
2807 * for a clear.
2808 */
2809 /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2810 */
2811 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2812 }
2813
2814 code_size += 2;
2815 }
2816
2817 if (program->kick_usc) {
2818 uint32_t doutu_constant64;
2819
2820 doutu_constant64 =
2821 pvr_pds_get_constants(&next_constant, 2, &data_size);
2822
2823 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2824 /* Issue the task to the USC.
2825 *
2826 * dout ds1[constant_use], ds0[constant_use],
2827 * ds1[constant_use], emit
2828 */
2829 *instruction++ = pvr_pds_encode_doutu(
2830 /* cc */ 0,
2831 /* END */ 1,
2832 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2833 */
2834 }
2835
2836 code_size += 1;
2837 }
2838
2839 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2840 /* End the program. */
2841 *instruction++ = pvr_pds_inst_encode_halt(0);
2842 }
2843 code_size += 1;
2844 } else {
2845 uint32_t total_num_doutw =
2846 program->num_dword_doutw + program->num_q_word_doutw;
2847 bool both_textures_and_uniforms =
2848 ((program->num_texture_dma_kicks > 0) &&
2849 ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2850 program->kick_usc));
2851 uint32_t doutu_constant64 = 0;
2852
2853 if (both_textures_and_uniforms) {
2854 /* If the size of a PDS data section is 0, the hardware won't run
2855 * it. We therefore don't need to branch when there is only a
2856 * texture OR a uniform update program.
2857 */
2858 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2859 uint32_t branch_address =
2860 MAX2(1 + program->num_texture_dma_kicks, 2);
2861
2862 /* Use If0 to BRAnch to uniform code. */
2863 *instruction++ = pvr_pds_encode_bra(
2864 /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2865 /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2866 /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2867 /* ADDR */ branch_address);
2868 }
2869
2870 code_size += 1;
2871 }
2872
2873 if (program->num_texture_dma_kicks > 0) {
2874 uint32_t dma_address_constant64;
2875 uint32_t dma_control_constant32;
2876 /* Allocate 3 constant spaces for each kick. The 64-bit constants
2877 * come first followed by the 32-bit constants.
2878 */
2879 dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2880 dma_control_constant32 =
2881 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2882
2883 for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2884 code_size += 1;
2885 if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2886 continue;
2887
2888 /* DMA the state into the secondary attributes. */
2889 *instruction++ = pvr_pds_encode_doutd(
2890 /* cc */ 0,
2891 /* END */ dma == (program->num_texture_dma_kicks - 1),
2892 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2893 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2894 * 64-bit
2895 * Src0
2896 */
2897 dma_address_constant64 += 2;
2898 dma_control_constant32 += 1;
2899 }
2900 } else if (both_textures_and_uniforms) {
2901 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2902 /* End the program. */
2903 *instruction++ = pvr_pds_inst_encode_halt(0);
2904 }
2905
2906 code_size += 1;
2907 }
2908
2909 /* Reserve space at the beginning of the data segment for the DOUTU Task
2910 * Control if one is needed.
2911 */
2912 if (program->kick_usc) {
2913 doutu_constant64 =
2914 pvr_pds_get_constants(&next_constant, 2, &data_size);
2915 }
2916
2917 /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2918 * 64-bit constants come first followed by the 32-bit constants.
2919 */
2920 uint32_t total_size_dma =
2921 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2922
2923 uint32_t dma_address_constant64 = pvr_pds_get_constants(
2924 &next_constant,
2925 program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2926 &data_size);
2927 uint32_t doutw_value_constant64 =
2928 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2929 uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2930 uint32_t doutw_control_constant32 =
2931 dma_control_constant32 + program->num_uniform_dma_kicks;
2932
2933 if (total_num_doutw > 0) {
2934 pvr_pds_get_constants(&next_constant, 0, &data_size);
2935
2936 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2937 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2938 /* Set the coefficient register to data value. */
2939 *instruction++ = pvr_pds_encode_doutw64(
2940 /* cc */ 0,
2941 /* END */ !program->num_uniform_dma_kicks &&
2942 !program->kick_usc && (i == total_num_doutw - 1),
2943 /* SRC1 */ doutw_control_constant32,
2944 /* SRC0 */ doutw_value_constant64 >> 1);
2945
2946 doutw_value_constant64 += 2;
2947 doutw_control_constant32 += 1;
2948 }
2949
2950 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2951 /* Set the coefficient register to data value. */
2952 *instruction++ = pvr_pds_encode_doutw64(
2953 /* cc */ 0,
2954 /* END */ !program->num_uniform_dma_kicks &&
2955 !program->kick_usc && (i == program->num_dword_doutw - 1),
2956 /* SRC1 */ doutw_control_constant32,
2957 /* SRC0 */ doutw_value_constant64 >> 1);
2958
2959 doutw_value_constant64 += 1;
2960 doutw_control_constant32 += 1;
2961 }
2962 }
2963 code_size += total_num_doutw;
2964 }
2965
2966 if (program->num_uniform_dma_kicks > 0) {
2967 for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2968 code_size += 1;
2969
2970 if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2971 continue;
2972
2973 bool last_instruction = false;
2974 if (!program->kick_usc &&
2975 (dma == program->num_uniform_dma_kicks - 1)) {
2976 last_instruction = true;
2977 }
2978 /* DMA the state into the secondary attributes. */
2979 *instruction++ = pvr_pds_encode_doutd(
2980 /* cc */ 0,
2981 /* END */ last_instruction,
2982 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2983 */
2984 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2985 * 64-bit
2986 * Src0
2987 */
2988 dma_address_constant64 += 2;
2989 dma_control_constant32 += 1;
2990 }
2991 }
2992
2993 if (program->kick_usc) {
2994 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2995 /* Issue the task to the USC.
2996 *
2997 * dout ds1[constant_use], ds0[constant_use],
2998 * ds1[constant_use], emit
2999 */
3000
3001 *instruction++ = pvr_pds_encode_doutu(
3002 /* cc */ 0,
3003 /* END */ 1,
3004 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
3005 }
3006
3007 code_size += 1;
3008 } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
3009 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3010 /* End the program. */
3011 *instruction++ = pvr_pds_inst_encode_halt(0);
3012 }
3013
3014 code_size += 1;
3015 }
3016 }
3017
3018 /* Minimum temp count is 1. */
3019 program->temps_used = MAX2(temps_used, 1);
3020 program->code_size = code_size;
3021
3022 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3023 return instruction;
3024 else
3025 return NULL;
3026 }
3027
3028 /**
3029 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3030 * program.
3031 *
3032 * \param program Pointer to the PDS pixel shader secondary attributes program.
3033 * \param buffer Pointer to the buffer for the code/data.
3034 * \param gen_mode Either code or data can be generated or sizes only updated.
3035 * \param dev_info PVR device information struct.
3036 * \returns Pointer to just beyond the buffer for the program/data.
3037 */
pvr_pds_pixel_shader_uniform_texture_data(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,bool uniform,const struct pvr_device_info * dev_info)3038 uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3039 struct pvr_pds_pixel_shader_sa_program *restrict program,
3040 uint32_t *restrict buffer,
3041 enum pvr_pds_generate_mode gen_mode,
3042 bool uniform,
3043 const struct pvr_device_info *dev_info)
3044 {
3045 uint32_t *constants = buffer;
3046 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3047 uint32_t temps_used = 0;
3048 uint32_t data_size = 0;
3049
3050 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3051 0);
3052
3053 assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3054
3055 /* Shape of data segment (note: clear is different).
3056 *
3057 * Uniform Texture
3058 * +--------------+ +-------------+
3059 * | USC Task L | | USC Task L |
3060 * | H | | H |
3061 * | DMA1 Src0 L | | DMA1 Src0 L |
3062 * | H | | H |
3063 * | DMA2 Src0 L | | |
3064 * | H | | |
3065 * | DMA1 Src1 | | DMA1 Src1 |
3066 * | DMA2 Src1 | | |
3067 * | DOUTW0 Src1 | | |
3068 * | DOUTW1 Src1 | | |
3069 * | ... | | |
3070 * | DOUTWn Srcn | | |
3071 * | other data | | |
3072 * +--------------+ +-------------+
3073 */
3074
3075 /* Generate the PDS pixel shader secondary attributes data.
3076 *
3077 * Packed Clear
3078 * The clear color can arrive packed in the right form in the first (or
3079 * first 2) dwords of the shared registers and the program will issue a
3080 * single DOUTW for this.
3081 */
3082 if (program->clear && uniform && program->packed_clear) {
3083 uint32_t color_constant1 =
3084 pvr_pds_get_constants(&next_constant, 2, &data_size);
3085
3086 uint32_t control_word_constant1 =
3087 pvr_pds_get_constants(&next_constant, 2, &data_size);
3088
3089 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3090 uint32_t doutw;
3091
3092 pvr_pds_write_constant64(constants,
3093 color_constant1,
3094 program->clear_color[0],
3095 program->clear_color[1]);
3096
3097 /* Load into first constant in common store. */
3098 doutw = pvr_pds_encode_doutw_src1(
3099 program->clear_color_dest_reg,
3100 PVR_PDS_DOUTW_LOWER64,
3101 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3102 false,
3103 dev_info);
3104
3105 /* Set the last flag. */
3106 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3107 pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3108 }
3109 } else if (program->clear && uniform) {
3110 uint32_t color_constant1, color_constant2;
3111
3112 if (program->clear_color_dest_reg & 0x1) {
3113 uint32_t color_constant3, control_word_constant1,
3114 control_word_constant2, color_constant4;
3115
3116 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3117 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3118 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3119
3120 control_word_constant1 =
3121 pvr_pds_get_constants(&next_constant, 2, &data_size);
3122 control_word_constant2 =
3123 pvr_pds_get_constants(&next_constant, 2, &data_size);
3124 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3125
3126 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3127 uint32_t doutw;
3128
3129 pvr_pds_write_constant32(constants,
3130 color_constant1,
3131 program->clear_color[0]);
3132
3133 pvr_pds_write_constant64(constants,
3134 color_constant2,
3135 program->clear_color[1],
3136 program->clear_color[2]);
3137
3138 pvr_pds_write_constant32(constants,
3139 color_constant3,
3140 program->clear_color[3]);
3141
3142 /* Load into first constant in common store. */
3143 doutw = pvr_pds_encode_doutw_src1(
3144 program->clear_color_dest_reg,
3145 PVR_PDS_DOUTW_LOWER32,
3146 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3147 false,
3148 dev_info);
3149
3150 pvr_pds_write_constant64(constants,
3151 control_word_constant1,
3152 doutw,
3153 0);
3154
3155 /* Move the destination register along. */
3156 doutw = pvr_pds_encode_doutw_src1(
3157 program->clear_color_dest_reg + 1,
3158 PVR_PDS_DOUTW_LOWER64,
3159 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3160 false,
3161 dev_info);
3162
3163 pvr_pds_write_constant64(constants,
3164 control_word_constant2,
3165 doutw,
3166 0);
3167
3168 /* Move the destination register along. */
3169 doutw = pvr_pds_encode_doutw_src1(
3170 program->clear_color_dest_reg + 3,
3171 PVR_PDS_DOUTW_LOWER32,
3172 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3173 false,
3174 dev_info);
3175
3176 /* Set the last flag. */
3177 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3178 pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3179 }
3180 } else {
3181 uint32_t control_word_constant, control_word_last_constant;
3182
3183 /* Put the clear color and control words into the first 8
3184 * constants.
3185 */
3186 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3187 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3188 control_word_constant =
3189 pvr_pds_get_constants(&next_constant, 2, &data_size);
3190 control_word_last_constant =
3191 pvr_pds_get_constants(&next_constant, 2, &data_size);
3192
3193 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3194 uint32_t doutw;
3195 pvr_pds_write_constant64(constants,
3196 color_constant1,
3197 program->clear_color[0],
3198 program->clear_color[1]);
3199
3200 pvr_pds_write_constant64(constants,
3201 color_constant2,
3202 program->clear_color[2],
3203 program->clear_color[3]);
3204
3205 /* Load into first constant in common store. */
3206 doutw = pvr_pds_encode_doutw_src1(
3207 program->clear_color_dest_reg,
3208 PVR_PDS_DOUTW_LOWER64,
3209 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3210 false,
3211 dev_info);
3212
3213 pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3214
3215 /* Move the destination register along. */
3216 doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3217 doutw |= (program->clear_color_dest_reg + 2)
3218 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3219
3220 /* Set the last flag. */
3221 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3222 pvr_pds_write_constant64(constants,
3223 control_word_last_constant,
3224 doutw,
3225 0);
3226 }
3227 }
3228
3229 /* Constants for the DOUTU Task Control, if needed. */
3230 if (program->kick_usc) {
3231 uint32_t doutu_constant64 =
3232 pvr_pds_get_constants(&next_constant, 2, &data_size);
3233
3234 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3235 pvr_pds_write_wide_constant(
3236 constants,
3237 doutu_constant64,
3238 program->usc_task_control.src0); /* 64-bit
3239 */
3240 /* Src0 */
3241 }
3242 }
3243 } else {
3244 if (uniform) {
3245 /* Reserve space at the beginning of the data segment for the DOUTU
3246 * Task Control if one is needed.
3247 */
3248 if (program->kick_usc) {
3249 uint32_t doutu_constant64 =
3250 pvr_pds_get_constants(&next_constant, 2, &data_size);
3251
3252 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3253 pvr_pds_write_wide_constant(
3254 constants,
3255 doutu_constant64,
3256 program->usc_task_control.src0); /* 64-bit Src0 */
3257 }
3258 }
3259
3260 uint32_t total_num_doutw =
3261 program->num_dword_doutw + program->num_q_word_doutw;
3262 uint32_t total_size_dma =
3263 program->num_dword_doutw + 2 * program->num_q_word_doutw;
3264
3265 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3266 * come first followed by the 32-bit constants.
3267 */
3268 uint32_t dma_address_constant64 =
3269 pvr_pds_get_constants(&next_constant,
3270 program->num_uniform_dma_kicks * 3 +
3271 total_size_dma + total_num_doutw,
3272 &data_size);
3273 uint32_t doutw_value_constant64 =
3274 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3275 uint32_t dma_control_constant32 =
3276 doutw_value_constant64 + total_size_dma;
3277 uint32_t doutw_control_constant32 =
3278 dma_control_constant32 + program->num_uniform_dma_kicks;
3279
3280 if (total_num_doutw > 0) {
3281 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3282 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3283 pvr_pds_write_constant64(
3284 constants,
3285 doutw_value_constant64,
3286 program->q_word_doutw_value[2 * i],
3287 program->q_word_doutw_value[2 * i + 1]);
3288 pvr_pds_write_constant32(
3289 constants,
3290 doutw_control_constant32,
3291 program->q_word_doutw_control[i] |
3292 ((!program->num_uniform_dma_kicks &&
3293 i == total_num_doutw - 1)
3294 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3295 : 0));
3296
3297 doutw_value_constant64 += 2;
3298 doutw_control_constant32 += 1;
3299 }
3300
3301 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3302 pvr_pds_write_constant32(constants,
3303 doutw_value_constant64,
3304 program->dword_doutw_value[i]);
3305 pvr_pds_write_constant32(
3306 constants,
3307 doutw_control_constant32,
3308 program->dword_doutw_control[i] |
3309 ((!program->num_uniform_dma_kicks &&
3310 i == program->num_dword_doutw - 1)
3311 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3312 : 0));
3313
3314 doutw_value_constant64 += 1;
3315 doutw_control_constant32 += 1;
3316 }
3317 }
3318 }
3319
3320 if (program->num_uniform_dma_kicks > 0) {
3321 uint32_t kick;
3322
3323 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3324 for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3325 kick++) {
3326 /* Copy the dma control words to constants. */
3327 pvr_pds_write_dma_address(constants,
3328 dma_address_constant64,
3329 program->uniform_dma_address[kick],
3330 false,
3331 dev_info);
3332 pvr_pds_write_constant32(constants,
3333 dma_control_constant32,
3334 program->uniform_dma_control[kick]);
3335
3336 dma_address_constant64 += 2;
3337 dma_control_constant32 += 1;
3338 }
3339
3340 pvr_pds_write_dma_address(constants,
3341 dma_address_constant64,
3342 program->uniform_dma_address[kick],
3343 false,
3344 dev_info);
3345 pvr_pds_write_constant32(
3346 constants,
3347 dma_control_constant32,
3348 program->uniform_dma_control[kick] |
3349 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3350 }
3351 }
3352
3353 } else if (program->num_texture_dma_kicks > 0) {
3354 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3355 * come first followed by the 32-bit constants.
3356 */
3357 uint32_t dma_address_constant64 =
3358 pvr_pds_get_constants(&next_constant,
3359 program->num_texture_dma_kicks * 3,
3360 &data_size);
3361 uint32_t dma_control_constant32 =
3362 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3363
3364 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3365 uint32_t kick;
3366 for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3367 /* Copy the DMA control words to constants. */
3368 pvr_pds_write_dma_address(constants,
3369 dma_address_constant64,
3370 program->texture_dma_address[kick],
3371 false,
3372 dev_info);
3373
3374 pvr_pds_write_constant32(constants,
3375 dma_control_constant32,
3376 program->texture_dma_control[kick]);
3377
3378 dma_address_constant64 += 2;
3379 dma_control_constant32 += 1;
3380 }
3381
3382 pvr_pds_write_dma_address(constants,
3383 dma_address_constant64,
3384 program->texture_dma_address[kick],
3385 false,
3386 dev_info);
3387
3388 pvr_pds_write_constant32(
3389 constants,
3390 dma_control_constant32,
3391 program->texture_dma_control[kick] |
3392 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3393 }
3394 }
3395 }
3396
3397 /* Save the data segment pointer and size. */
3398 program->data_segment = constants;
3399
3400 /* Minimum temp count is 1. */
3401 program->temps_used = MAX2(temps_used, 1);
3402 program->data_size = data_size;
3403
3404 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3405 return (constants + next_constant);
3406 else
3407 return NULL;
3408 }
3409
3410 /**
3411 * Generates generic DOUTC PDS program.
3412 *
3413 * \param program Pointer to the PDS kick USC.
3414 * \param buffer Pointer to the buffer for the program.
3415 * \param gen_mode Either code and data can be generated, or sizes only updated.
3416 * \returns Pointer to just beyond the buffer for the code or program segment.
3417 */
pvr_pds_generate_doutc(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3418 uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3419 uint32_t *restrict buffer,
3420 enum pvr_pds_generate_mode gen_mode)
3421 {
3422 uint32_t constant = 0;
3423
3424 /* Automatically get a data size of 1x 128bit chunks. */
3425 uint32_t data_size = 0, code_size = 0;
3426
3427 /* Setup the data part. */
3428 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3429 uint32_t *instruction = buffer;
3430 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3431 * dwords.
3432 */
3433
3434 /* Update the program sizes. */
3435 program->data_size = data_size;
3436 program->code_size = code_size;
3437 program->data_segment = constants;
3438
3439 if (gen_mode == PDS_GENERATE_SIZES)
3440 return NULL;
3441
3442 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3443 /* Copy the USC task control words to constants. */
3444
3445 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3446 pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3447 * Src0
3448 */
3449
3450 uint32_t control_word_constant =
3451 pvr_pds_get_constants(&next_constant, 2, &data_size);
3452 pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3453 * Src1
3454 */
3455
3456 program->data_size = data_size;
3457 buffer += data_size;
3458
3459 return buffer;
3460 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3461 *instruction++ = pvr_pds_inst_encode_doutc(
3462 /* cc */ 0,
3463 /* END */ 0);
3464
3465 code_size++;
3466
3467 /* End the program. */
3468 *instruction++ = pvr_pds_inst_encode_halt(0);
3469 code_size++;
3470
3471 program->code_size = code_size;
3472 }
3473
3474 return instruction;
3475 }
3476
3477 /**
3478 * Generates generic kick DOUTU PDS program in a single data+code block.
3479 *
3480 * \param control Pointer to the PDS kick USC.
3481 * \param buffer Pointer to the buffer for the program.
3482 * \param gen_mode Either code and data can be generated or sizes only updated.
3483 * \param dev_info PVR device information structure.
3484 * \returns Pointer to just beyond the buffer for the code or program segment.
3485 */
pvr_pds_generate_doutw(struct pvr_pds_doutw_control * restrict control,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3486 uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3487 uint32_t *restrict buffer,
3488 enum pvr_pds_generate_mode gen_mode,
3489 const struct pvr_device_info *dev_info)
3490 {
3491 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3492 uint32_t doutw;
3493 uint32_t data_size = 0, code_size = 0;
3494 uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3495 uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3496
3497 /* Assert if buffer is exceeded. */
3498 assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3499
3500 uint32_t *constants = buffer;
3501 uint32_t *instruction = buffer;
3502
3503 /* Put the constants and control words interleaved in the data region. */
3504 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3505 const_pair++) {
3506 constant[const_pair] =
3507 pvr_pds_get_constants(&next_constant, 2, &data_size);
3508 control_word_constant[const_pair] =
3509 pvr_pds_get_constants(&next_constant, 2, &data_size);
3510 }
3511
3512 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3513 /* Data segment points to start of constants. */
3514 control->data_segment = constants;
3515
3516 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3517 const_pair++) {
3518 pvr_pds_write_constant64(constants,
3519 constant[const_pair],
3520 H32(control->doutw_data[const_pair]),
3521 L32(control->doutw_data[const_pair]));
3522
3523 /* Start loading at offset 0. */
3524 if (control->dest_store == PDS_COMMON_STORE) {
3525 doutw = pvr_pds_encode_doutw_src1(
3526 (2 * const_pair),
3527 PVR_PDS_DOUTW_LOWER64,
3528 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3529 false,
3530 dev_info);
3531 } else {
3532 doutw = pvr_pds_encode_doutw_src1(
3533 (2 * const_pair),
3534 PVR_PDS_DOUTW_LOWER64,
3535 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3536 false,
3537 dev_info);
3538 }
3539
3540 if (const_pair + 1 == control->num_const64) {
3541 /* Set the last flag for the MCU (assume there are no following
3542 * DOUTD's).
3543 */
3544 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3545 }
3546 pvr_pds_write_constant64(constants,
3547 control_word_constant[const_pair],
3548 doutw,
3549 0);
3550 }
3551
3552 control->data_size = data_size;
3553 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3554 /* Code section. */
3555
3556 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3557 const_pair++) {
3558 /* DOUTW the PDS data to the USC constants. */
3559 *instruction++ = pvr_pds_encode_doutw64(
3560 /* cc */ 0,
3561 /* END */ control->last_instruction &&
3562 (const_pair + 1 == control->num_const64),
3563 /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3564 * Src1.
3565 */
3566 /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3567
3568 code_size++;
3569 }
3570
3571 if (control->last_instruction) {
3572 /* End the program. */
3573 *instruction++ = pvr_pds_inst_encode_halt(0);
3574 code_size++;
3575 }
3576
3577 control->code_size = code_size;
3578 }
3579
3580 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3581 return (constants + next_constant);
3582 else
3583 return instruction;
3584 }
3585
3586 /**
3587 * Generates generic kick DOUTU PDS program in a single data+code block.
3588 *
3589 * \param program Pointer to the PDS kick USC.
3590 * \param buffer Pointer to the buffer for the program.
3591 * \param start_next_constant Next constant in data segment. Non-zero if another
3592 * instruction precedes the DOUTU.
3593 * \param cc_enabled If true then the DOUTU is predicated (cc set).
3594 * \param gen_mode Either code and data can be generated or sizes only updated.
3595 * \returns Pointer to just beyond the buffer for the code or program segment.
3596 */
pvr_pds_kick_usc(struct pvr_pds_kickusc_program * restrict program,uint32_t * restrict buffer,uint32_t start_next_constant,bool cc_enabled,enum pvr_pds_generate_mode gen_mode)3597 uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3598 uint32_t *restrict buffer,
3599 uint32_t start_next_constant,
3600 bool cc_enabled,
3601 enum pvr_pds_generate_mode gen_mode)
3602 {
3603 uint32_t constant = 0;
3604
3605 /* Automatically get a data size of 2 128bit chunks. */
3606 uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3607 uint32_t code_size = 1; /* Single doutu */
3608 uint32_t dummy_count = 0;
3609
3610 /* Setup the data part. */
3611 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3612 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3613 * dwords.
3614 */
3615
3616 /* Update the program sizes. */
3617 program->data_size = data_size;
3618 program->code_size = code_size;
3619 program->data_segment = constants;
3620
3621 if (gen_mode == PDS_GENERATE_SIZES)
3622 return NULL;
3623
3624 if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3625 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3626 /* Copy the USC task control words to constants. */
3627
3628 constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3629
3630 pvr_pds_write_wide_constant(constants,
3631 constant + 0,
3632 program->usc_task_control.src0); /* 64-bit
3633 * Src0.
3634 */
3635 buffer += data_size;
3636
3637 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3638 return buffer;
3639 }
3640
3641 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3642 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3643 /* Generate the PDS pixel shader code. */
3644
3645 /* Setup the instruction pointer. */
3646 uint32_t *instruction = buffer;
3647
3648 /* Issue the task to the USC.
3649 *
3650 * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3651 * halt halt
3652 */
3653
3654 *instruction++ = pvr_pds_encode_doutu(
3655 /* cc */ cc_enabled,
3656 /* END */ 1,
3657 /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3658 * 64-bit Src0
3659 */
3660
3661 /* Return pointer to just after last instruction. */
3662 return instruction;
3663 }
3664
3665 /* Execution should never reach here; keep compiler happy. */
3666 return NULL;
3667 }
3668
pvr_pds_generate_compute_barrier_conditional(uint32_t * buffer,enum pvr_pds_generate_mode gen_mode)3669 uint32_t *pvr_pds_generate_compute_barrier_conditional(
3670 uint32_t *buffer,
3671 enum pvr_pds_generate_mode gen_mode)
3672 {
3673 /* Compute barriers supported. Need to test for coeff sync task. */
3674
3675 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3676 return buffer; /* No data segment. */
3677
3678 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3679 /* Test whether this is the coefficient update task or not. */
3680 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3681 */
3682 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3683 */
3684 PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3685 */
3686 1 /* ADDR */);
3687
3688 /* Encode a HALT. */
3689 *buffer++ = pvr_pds_inst_encode_halt(1);
3690
3691 /* Reset the default predicate to IF0. */
3692 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3693 */
3694 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3695 */
3696 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3697 */
3698 1 /* ADDR */);
3699 }
3700
3701 return buffer;
3702 }
3703
3704 /**
3705 * Generates program to kick the USC task to store shared.
3706 *
3707 * \param program Pointer to the PDS shared register.
3708 * \param buffer Pointer to the buffer for the program.
3709 * \param gen_mode Either code and data can be generated or sizes only updated.
3710 * \param dev_info PVR device information structure.
3711 * \returns Pointer to just beyond the buffer for the program.
3712 */
pvr_pds_generate_shared_storing_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3713 uint32_t *pvr_pds_generate_shared_storing_program(
3714 struct pvr_pds_shared_storing_program *restrict program,
3715 uint32_t *restrict buffer,
3716 enum pvr_pds_generate_mode gen_mode,
3717 const struct pvr_device_info *dev_info)
3718 {
3719 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3720 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3721
3722 if (gen_mode == PDS_GENERATE_SIZES)
3723 return NULL;
3724
3725 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3726 uint32_t *constants = buffer;
3727
3728 constants =
3729 pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3730 program->data_size = doutw_control->data_size;
3731
3732 constants = pvr_pds_kick_usc(kick_usc_program,
3733 constants,
3734 0,
3735 program->cc_enable,
3736 gen_mode);
3737 program->data_size += kick_usc_program->data_size;
3738
3739 return constants;
3740 }
3741
3742 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3743 /* Generate PDS code segment. */
3744 uint32_t *instruction = buffer;
3745
3746 /* doutw vi1, vi0
3747 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3748 * emit
3749 */
3750 instruction =
3751 pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3752 program->code_size = doutw_control->code_size;
3753
3754 /* Offset into data segment follows on from doutw data segment. */
3755 instruction = pvr_pds_kick_usc(kick_usc_program,
3756 instruction,
3757 doutw_control->data_size,
3758 program->cc_enable,
3759 gen_mode);
3760 program->code_size += kick_usc_program->code_size;
3761
3762 return instruction;
3763 }
3764
3765 /* Execution should never reach here. */
3766 return NULL;
3767 }
3768
pvr_pds_generate_fence_terminate_program(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3769 uint32_t *pvr_pds_generate_fence_terminate_program(
3770 struct pvr_pds_fence_program *restrict program,
3771 uint32_t *restrict buffer,
3772 enum pvr_pds_generate_mode gen_mode,
3773 const struct pvr_device_info *dev_info)
3774 {
3775 uint32_t data_size = 0;
3776 uint32_t code_size = 0;
3777
3778 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3779 /* Data segment. */
3780 uint32_t *constants, *constants_base;
3781
3782 constants = constants_base = (uint32_t *)buffer;
3783
3784 /* DOUTC sources are not used, but they must be valid. */
3785 pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3786 data_size += program->data_size;
3787
3788 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3789 /* Append a 64-bit constant with value 1. Used to increment ptemp.
3790 * Return the offset into the data segment.
3791 */
3792 program->fence_constant_word =
3793 pvr_pds_append_constant64(constants_base, 1, &data_size);
3794 }
3795
3796 program->data_size = data_size;
3797 return constants;
3798 }
3799
3800 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3801 /* Code segment. */
3802 uint32_t *instruction = (uint32_t *)buffer;
3803
3804 instruction = pvr_pds_generate_compute_barrier_conditional(
3805 instruction,
3806 PDS_GENERATE_CODE_SEGMENT);
3807 code_size += 3;
3808
3809 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3810 /* lock */
3811 *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3812
3813 /* add64 pt[0], pt[0], #1 */
3814 *instruction++ = pvr_pds_inst_encode_add64(
3815 0, /* cc */
3816 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3817 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3818 PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3819 */
3820 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3821 (program->fence_constant_word >> 1), /* src1 = 1 */
3822 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3823 * ptemp[0]
3824 */
3825
3826 /* release */
3827 *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3828
3829 /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */
3830 *instruction++ = pvr_pds_inst_encode_cmpi(
3831 0, /* cc */
3832 PVR_ROGUE_PDSINST_COP_EQ,
3833 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3834 * = ptemp[0]
3835 */
3836 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3837
3838 /* bra -1 */
3839 *instruction++ =
3840 pvr_pds_encode_bra(0, /* cc */
3841 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3842 */
3843 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3844 */
3845 -1); /* bra PC */
3846 code_size += 5;
3847 }
3848
3849 /* DOUTC */
3850 instruction = pvr_pds_generate_doutc(program,
3851 instruction,
3852 PDS_GENERATE_CODE_SEGMENT);
3853 code_size += program->code_size;
3854
3855 program->code_size = code_size;
3856 return instruction;
3857 }
3858
3859 /* Execution should never reach here. */
3860 return NULL;
3861 }
3862
3863 /**
3864 * Generates program to kick the USC task to load shared registers from memory.
3865 *
3866 * \param program Pointer to the PDS shared register.
3867 * \param buffer Pointer to the buffer for the program.
3868 * \param gen_mode Either code and data can be generated or sizes only updated.
3869 * \param dev_info PVR device information struct.
3870 * \returns Pointer to just beyond the buffer for the program.
3871 */
pvr_pds_generate_compute_shared_loading_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3872 uint32_t *pvr_pds_generate_compute_shared_loading_program(
3873 struct pvr_pds_shared_storing_program *restrict program,
3874 uint32_t *restrict buffer,
3875 enum pvr_pds_generate_mode gen_mode,
3876 const struct pvr_device_info *dev_info)
3877 {
3878 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3879 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3880
3881 uint32_t next_constant;
3882 uint32_t data_size = 0;
3883 uint32_t code_size = 0;
3884
3885 /* This needs to persist to the CODE_SEGMENT call. */
3886 static uint32_t fence_constant_word = 0;
3887 uint64_t zero_constant64 = 0;
3888
3889 if (gen_mode == PDS_GENERATE_SIZES)
3890 return NULL;
3891
3892 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3893 uint32_t *constants = buffer;
3894
3895 constants = pvr_pds_generate_doutw(doutw_control,
3896 constants,
3897 PDS_GENERATE_DATA_SEGMENT,
3898 dev_info);
3899 data_size += doutw_control->data_size;
3900
3901 constants = pvr_pds_kick_usc(kick_usc_program,
3902 constants,
3903 0,
3904 program->cc_enable,
3905 gen_mode);
3906 data_size += kick_usc_program->data_size;
3907
3908 /* Copy the fence constant value (64-bit). */
3909 next_constant = data_size; /* Assumes data words fully packed. */
3910 fence_constant_word =
3911 pvr_pds_get_constants(&next_constant, 2, &data_size);
3912
3913 /* Encode the fence constant src0 (offset measured from start of data
3914 * buffer). Fence barrier is initialized to zero.
3915 */
3916 pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3917 /* Update the const size. */
3918 data_size += 2;
3919 constants += 2;
3920
3921 program->data_size = data_size;
3922 return constants;
3923 }
3924
3925 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3926 /* Generate PDS code segment. */
3927 uint32_t *instruction = buffer;
3928
3929 /* add64 pt0, c0, c0
3930 * IF [2x Phantoms]
3931 * add64 pt1, c0, c0
3932 * st [constant_mem_addr], pt0, 4
3933 * ENDIF
3934 * doutw vi1, vi0
3935 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3936 * emit
3937 *
3938 * Zero the persistent temp (SW fence for context switch).
3939 */
3940 *instruction++ = pvr_pds_inst_encode_add64(
3941 0, /* cc */
3942 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3943 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3944 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3945 (fence_constant_word >> 1), /* src0
3946 * = 0
3947 */
3948 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3949 (fence_constant_word >> 1), /* src1
3950 * = 0
3951 */
3952 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3953 */
3954 code_size++;
3955
3956 instruction = pvr_pds_generate_doutw(doutw_control,
3957 instruction,
3958 PDS_GENERATE_CODE_SEGMENT,
3959 dev_info);
3960 code_size += doutw_control->code_size;
3961
3962 /* Offset into data segment follows on from doutw data segment. */
3963 instruction = pvr_pds_kick_usc(kick_usc_program,
3964 instruction,
3965 doutw_control->data_size,
3966 program->cc_enable,
3967 gen_mode);
3968 code_size += kick_usc_program->code_size;
3969
3970 program->code_size = code_size;
3971 return instruction;
3972 }
3973
3974 /* Execution should never reach here. */
3975 return NULL;
3976 }
3977
3978 /**
3979 * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3980 * Relies on num_fpu_iterators being initialized for size calculation.
3981 * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3982 * initialized for program generation.
3983 *
3984 * \param program Pointer to the PDS pixel shader program.
3985 * \param buffer Pointer to the buffer for the program.
3986 * \param gen_mode Either code and data can be generated or sizes only updated.
3987 * \returns Pointer to just beyond the buffer for the program.
3988 */
pvr_pds_coefficient_loading(struct pvr_pds_coeff_loading_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3989 uint32_t *pvr_pds_coefficient_loading(
3990 struct pvr_pds_coeff_loading_program *restrict program,
3991 uint32_t *restrict buffer,
3992 enum pvr_pds_generate_mode gen_mode)
3993 {
3994 uint32_t constant;
3995 uint32_t *instruction;
3996 uint32_t total_data_size, code_size;
3997
3998 /* Place constants at the front of the buffer. */
3999 uint32_t *constants = buffer;
4000 /* Start counting constants from 0. */
4001 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4002
4003 /* Save the data segment pointer and size. */
4004 program->data_segment = constants;
4005
4006 total_data_size = 0;
4007 code_size = 0;
4008
4009 total_data_size += 2 * program->num_fpu_iterators;
4010 code_size += program->num_fpu_iterators;
4011
4012 /* Instructions start where constants finished, but we must take note of
4013 * alignment.
4014 *
4015 * 128-bit boundary = 4 dwords.
4016 */
4017 total_data_size = ALIGN_POT(total_data_size, 4);
4018 if (gen_mode != PDS_GENERATE_SIZES) {
4019 uint32_t data_size = 0;
4020 uint32_t iterator = 0;
4021
4022 instruction = buffer + total_data_size;
4023
4024 while (iterator < program->num_fpu_iterators) {
4025 uint64_t iterator_word;
4026
4027 /* Copy the USC task control words to constants. */
4028 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4029
4030 /* Write the first iterator. */
4031 iterator_word =
4032 (uint64_t)program->FPU_iterators[iterator]
4033 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4034
4035 /* Write the destination. */
4036 iterator_word |=
4037 (uint64_t)program->destination[iterator++]
4038 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4039
4040 /* If this is the last DOUTI word the "Last Issue" bit should be
4041 * set.
4042 */
4043 if (iterator >= program->num_fpu_iterators) {
4044 iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4045 }
4046
4047 /* Write the word to the buffer. */
4048 pvr_pds_write_wide_constant(constants,
4049 constant,
4050 iterator_word); /* 64-bit
4051 Src0
4052 */
4053
4054 /* Write the DOUT instruction. */
4055 *instruction++ = pvr_pds_encode_douti(
4056 /* cc */ 0,
4057 /* END */ 0,
4058 /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4059 }
4060
4061 /* Update the last DOUTI instruction to have the END flag set. */
4062 *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4063 } else {
4064 instruction = NULL;
4065 }
4066
4067 /* Update the data size and code size. Minimum temp count is 1. */
4068 program->temps_used = 1;
4069 program->data_size = total_data_size;
4070 program->code_size = code_size;
4071
4072 return instruction;
4073 }
4074
4075 /**
4076 * Generate a single ld/st instruction. This can correspond to one or more
4077 * real ld/st instructions based on the value of count.
4078 *
4079 * \param ld true to generate load, false to generate store.
4080 * \param control Cache mode control.
4081 * \param temp_index Dest temp for load/source temp for store, in 32bits
4082 * register index.
4083 * \param address Source for load/dest for store in bytes.
4084 * \param count Number of dwords for load/store.
4085 * \param next_constant
4086 * \param total_data_size
4087 * \param total_code_size
4088 * \param buffer Pointer to the buffer for the program.
4089 * \param data_fence Issue data fence.
4090 * \param gen_mode Either code and data can be generated or sizes only updated.
4091 * \param dev_info PVR device information structure.
4092 * \returns Pointer to just beyond the buffer for the program.
4093 */
pvr_pds_generate_single_ldst_instruction(bool ld,const struct pvr_pds_ldst_control * control,uint32_t temp_index,uint64_t address,uint32_t count,uint32_t * next_constant,uint32_t * total_data_size,uint32_t * total_code_size,uint32_t * restrict buffer,bool data_fence,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4094 uint32_t *pvr_pds_generate_single_ldst_instruction(
4095 bool ld,
4096 const struct pvr_pds_ldst_control *control,
4097 uint32_t temp_index,
4098 uint64_t address,
4099 uint32_t count,
4100 uint32_t *next_constant,
4101 uint32_t *total_data_size,
4102 uint32_t *total_code_size,
4103 uint32_t *restrict buffer,
4104 bool data_fence,
4105 enum pvr_pds_generate_mode gen_mode,
4106 const struct pvr_device_info *dev_info)
4107 {
4108 /* A single ld/ST here does NOT actually correspond to a single ld/ST
4109 * instruction, but may needs multiple ld/ST instructions because each ld/ST
4110 * instruction can only ld/ST a restricted max number of dwords which may
4111 * less than count passed here.
4112 */
4113
4114 uint32_t num_inst;
4115 uint32_t constant;
4116
4117 if (ld) {
4118 /* ld must operate on 64bits unit, and it needs to load from and to 128
4119 * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4120 * 1, 2, ...) times 64bits unit.
4121 */
4122 uint32_t per_inst_count = 0;
4123 uint32_t last_inst_count;
4124
4125 assert((gen_mode == PDS_GENERATE_SIZES) ||
4126 (((count % 2) == 0) && ((address % 16) == 0) &&
4127 (temp_index % 2) == 0));
4128
4129 count >>= 1;
4130 temp_index >>= 1;
4131
4132 /* Found out how many ld instructions are needed and ld size for the all
4133 * possible ld instructions.
4134 */
4135 if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4136 num_inst = 1;
4137 last_inst_count = count;
4138 } else {
4139 per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4140 if ((per_inst_count % 2) != 0)
4141 per_inst_count -= 1;
4142
4143 num_inst = count / per_inst_count;
4144 last_inst_count = count - per_inst_count * num_inst;
4145 num_inst += 1;
4146 }
4147
4148 /* Generate all the instructions. */
4149 for (uint32_t i = 0; i < num_inst; i++) {
4150 if ((i == (num_inst - 1)) && (last_inst_count == 0))
4151 break;
4152
4153 /* A single load instruction. */
4154 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4155
4156 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4157 uint64_t ld_src0 = 0;
4158
4159 ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4160 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4161 ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4162 : per_inst_count) &
4163 PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4164 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4165 ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4166 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4167
4168 if (!control) {
4169 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4170
4171 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4172 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4173
4174 } else {
4175 ld_src0 |= control->cache_control_const;
4176 }
4177
4178 /* Write it to the constant. */
4179 pvr_pds_write_constant64(buffer,
4180 constant,
4181 (uint32_t)(ld_src0),
4182 (uint32_t)(ld_src0 >> 32));
4183
4184 /* Adjust value for next ld instruction. */
4185 temp_index += per_inst_count;
4186 address += (((uint64_t)(per_inst_count)) << 3);
4187 }
4188
4189 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4190 *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4191
4192 if (data_fence)
4193 *buffer++ = pvr_pds_inst_encode_wdf(0);
4194 }
4195 }
4196 } else {
4197 /* ST needs source memory address to be 32bits aligned. */
4198 assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4199
4200 /* Found out how many ST instructions are needed, each ST can only store
4201 * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4202 */
4203 num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4204 num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4205
4206 /* Generate all the instructions. */
4207 for (uint32_t i = 0; i < num_inst; i++) {
4208 /* A single store instruction. */
4209 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4210
4211 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4212 uint32_t per_inst_count =
4213 (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4214 ? count
4215 : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4216 uint64_t st_src0 = 0;
4217
4218 st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4219 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4220 st_src0 |=
4221 (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4222 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4223 st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4224 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4225
4226 if (!control) {
4227 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4228
4229 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4230 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4231 }
4232
4233 } else {
4234 st_src0 |= control->cache_control_const;
4235 }
4236
4237 /* Write it to the constant. */
4238 pvr_pds_write_constant64(buffer,
4239 constant,
4240 (uint32_t)(st_src0),
4241 (uint32_t)(st_src0 >> 32));
4242
4243 /* Adjust value for next ST instruction. */
4244 temp_index += per_inst_count;
4245 count -= per_inst_count;
4246 address += (((uint64_t)(per_inst_count)) << 2);
4247 }
4248
4249 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4250 *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4251
4252 if (data_fence)
4253 *buffer++ = pvr_pds_inst_encode_wdf(0);
4254 }
4255 }
4256 }
4257
4258 (*total_code_size) += num_inst;
4259 if (data_fence)
4260 (*total_code_size) += num_inst;
4261
4262 if (gen_mode != PDS_GENERATE_SIZES)
4263 return buffer;
4264 return NULL;
4265 }
4266
4267 /**
4268 * Generate programs used to prepare stream out, i.e., clear stream out buffer
4269 * overflow flags and update Persistent temps by a ld instruction.
4270 *
4271 * This must be used in PPP state update.
4272 *
4273 * \param program Pointer to the stream out program.
4274 * \param buffer Pointer to the buffer for the program.
4275 * \param store_mode If true then the data is stored to memory. If false then
4276 * the data is loaded from memory.
4277 * \param gen_mode Either code and data can be generated or sizes only updated.
4278 * \param dev_info PVR device information structure.
4279 * \returns Pointer to just beyond the buffer for the program.
4280 */
pvr_pds_generate_stream_out_init_program(struct pvr_pds_stream_out_init_program * restrict program,uint32_t * restrict buffer,bool store_mode,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4281 uint32_t *pvr_pds_generate_stream_out_init_program(
4282 struct pvr_pds_stream_out_init_program *restrict program,
4283 uint32_t *restrict buffer,
4284 bool store_mode,
4285 enum pvr_pds_generate_mode gen_mode,
4286 const struct pvr_device_info *dev_info)
4287 {
4288 uint32_t total_data_size = 0;
4289 uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4290
4291 /* Start counting constants from 0. */
4292 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4293
4294 uint32_t total_code_size = 1;
4295
4296 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4297 /* We only need to clear global stream out predicate, other predicates
4298 * are not used during the stream out buffer overflow test.
4299 */
4300 *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4301 }
4302
4303 for (uint32_t index = 0; index < program->num_buffers; index++) {
4304 if (program->dev_address_for_buffer_data[index] != 0) {
4305 /* Generate load/store program to load/store persistent temps. */
4306
4307 /* NOTE: store_mode == true case should be handled by
4308 * StreamOutTerminate.
4309 */
4310 buffer = pvr_pds_generate_single_ldst_instruction(
4311 !store_mode,
4312 NULL,
4313 PTDst,
4314 program->dev_address_for_buffer_data[index],
4315 program->pds_buffer_data_size[index],
4316 &next_constant,
4317 &total_data_size,
4318 &total_code_size,
4319 buffer,
4320 false,
4321 gen_mode,
4322 dev_info);
4323 }
4324
4325 PTDst += program->pds_buffer_data_size[index];
4326 }
4327
4328 total_code_size += 2;
4329
4330 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4331 /* We need to fence the loading. */
4332 *buffer++ = pvr_pds_inst_encode_wdf(0);
4333 *buffer++ = pvr_pds_inst_encode_halt(0);
4334 }
4335
4336 /* Save size information to program */
4337 program->stream_out_init_pds_data_size =
4338 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4339 /* PDS program code size. */
4340 program->stream_out_init_pds_code_size = total_code_size;
4341
4342 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4343 return buffer + program->stream_out_init_pds_data_size;
4344 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4345 return buffer;
4346
4347 return NULL;
4348 }
4349
4350 /**
4351 * Generate stream out terminate program for stream out.
4352 *
4353 * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4354 * will be stored.
4355 *
4356 * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4357 * will be stored into memory.
4358 *
4359 * The stream out terminate program is used to update the PPP state and the data
4360 * and code section cannot be separate.
4361 *
4362 * \param program Pointer to the stream out program.
4363 * \param buffer Pointer to the buffer for the program.
4364 * \param gen_mode Either code and data can be generated or sizes only updated.
4365 * \param dev_info PVR device info structure.
4366 * \returns Pointer to just beyond the buffer for the program.
4367 */
pvr_pds_generate_stream_out_terminate_program(struct pvr_pds_stream_out_terminate_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4368 uint32_t *pvr_pds_generate_stream_out_terminate_program(
4369 struct pvr_pds_stream_out_terminate_program *restrict program,
4370 uint32_t *restrict buffer,
4371 enum pvr_pds_generate_mode gen_mode,
4372 const struct pvr_device_info *dev_info)
4373 {
4374 uint32_t next_constant;
4375 uint32_t total_data_size = 0, total_code_size = 0;
4376
4377 /* Start counting constants from 0. */
4378 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4379
4380 /* Generate store program to store persistent temps. */
4381 buffer = pvr_pds_generate_single_ldst_instruction(
4382 false,
4383 NULL,
4384 PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4385 program->dev_address_for_storing_persistent_temp,
4386 program->pds_persistent_temp_size_to_store,
4387 &next_constant,
4388 &total_data_size,
4389 &total_code_size,
4390 buffer,
4391 false,
4392 gen_mode,
4393 dev_info);
4394
4395 total_code_size += 2;
4396 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4397 *buffer++ = pvr_pds_inst_encode_wdf(0);
4398 *buffer++ = pvr_pds_inst_encode_halt(0);
4399 }
4400
4401 /* Save size information to program. */
4402 program->stream_out_terminate_pds_data_size =
4403 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4404 /* PDS program code size. */
4405 program->stream_out_terminate_pds_code_size = total_code_size;
4406
4407 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4408 return buffer + program->stream_out_terminate_pds_data_size;
4409 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4410 return buffer;
4411
4412 return NULL;
4413 }
4414
4415 /* DrawArrays works in several steps:
4416 *
4417 * 1) load data from draw_indirect buffer
4418 * 2) tweak data to match hardware formats
4419 * 3) write data to indexblock
4420 * 4) signal the VDM to continue
4421 *
4422 * This is complicated by HW limitations on alignment, as well as a HWBRN.
4423 *
4424 * 1) Load data.
4425 * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4426 * spec we must deal with this by choosing an appropriate earlier address and
4427 * loading enough dwords that we load the entirety of the buffer.
4428 *
4429 * if addr & 0xf:
4430 * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4431 * data = tmp[0 + (uiAddr & 0xf) >> 2]...
4432 * else
4433 * load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4434 * data = tmp[0]...
4435 *
4436 *
4437 * 2) Tweak data.
4438 * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4439 * the VDM control stream. We must subtract 1 from the loaded primCount.
4440 *
4441 * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4442 * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4443 * into another tmp that has the correct alignment. Note: this is only required
4444 * when data = tmp[even], as primCount is data+1:
4445 *
4446 * if data = tmp[even]:
4447 * primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4448 * else:
4449 * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4450 *
4451 * This boils down to:
4452 *
4453 * primCount = data[1]
4454 * primCountSrc = data[1]
4455 * if brn_present && (data is even):
4456 * mov scratch, primCount
4457 * primCountSrc = scratch
4458 * endif
4459 * sub primCount, primCountSrc, 1
4460 *
4461 * 3) Store Data.
4462 * Write the now-tweaked data over the top of the indexblock.
4463 * To ensure the write completes before the VDM re-reads the data, we must cause
4464 * a data hazard by doing a dummy (dummy meaning we don't care about the
4465 * returned data) load from the same addresses. Again, because the ld must
4466 * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4467 * index block is 128-bit aligned. This is the client driver's responsibility.
4468 *
4469 * st data[0, 1, 2] -> (idxblock + 4)
4470 * load [idxblock] 4 dwords
4471 *
4472 * 4) Signal the VDM
4473 * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4474 * where it is currently fenced on a dummy idxblock that has been inserted by
4475 * the driver.
4476 */
4477
4478 #include "pvr_draw_indirect_arrays0.h"
4479 #include "pvr_draw_indirect_arrays1.h"
4480 #include "pvr_draw_indirect_arrays2.h"
4481 #include "pvr_draw_indirect_arrays3.h"
4482
4483 #include "pvr_draw_indirect_arrays_base_instance0.h"
4484 #include "pvr_draw_indirect_arrays_base_instance1.h"
4485 #include "pvr_draw_indirect_arrays_base_instance2.h"
4486 #include "pvr_draw_indirect_arrays_base_instance3.h"
4487
4488 #include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4489 #include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4490 #include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4491 #include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4492
4493 #define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \
4494 ((device)->features.has_slc_mcu_cache_controls \
4495 ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4496 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4497
pvr_pds_generate_draw_arrays_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4498 void pvr_pds_generate_draw_arrays_indirect(
4499 struct pvr_pds_drawindirect_program *restrict program,
4500 uint32_t *restrict buffer,
4501 enum pvr_pds_generate_mode gen_mode,
4502 const struct pvr_device_info *dev_info)
4503 {
4504 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4505 (gen_mode == PDS_GENERATE_SIZES)) {
4506 const struct pvr_psc_program_output *psc_program = NULL;
4507 switch ((program->arg_buffer >> 2) % 4) {
4508 case 0:
4509 if (program->support_base_instance) {
4510 if (program->increment_draw_id) {
4511 psc_program =
4512 &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4513 } else {
4514 psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4515 }
4516 } else {
4517 psc_program = &pvr_draw_indirect_arrays0_program;
4518 }
4519 break;
4520 case 1:
4521 if (program->support_base_instance) {
4522 if (program->increment_draw_id) {
4523 psc_program =
4524 &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4525 } else {
4526 psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4527 }
4528 } else {
4529 psc_program = &pvr_draw_indirect_arrays1_program;
4530 }
4531 break;
4532 case 2:
4533 if (program->support_base_instance) {
4534 if (program->increment_draw_id) {
4535 psc_program =
4536 &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4537 } else {
4538 psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4539 }
4540 } else {
4541 psc_program = &pvr_draw_indirect_arrays2_program;
4542 }
4543 break;
4544 case 3:
4545 if (program->support_base_instance) {
4546 if (program->increment_draw_id) {
4547 psc_program =
4548 &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4549 } else {
4550 psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4551 }
4552 } else {
4553 psc_program = &pvr_draw_indirect_arrays3_program;
4554 }
4555 break;
4556 }
4557
4558 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4559 memcpy(buffer,
4560 psc_program->code,
4561 psc_program->code_size * sizeof(uint32_t));
4562 #if defined(DUMP_PDS)
4563 for (uint32_t i = 0; i < psc_program->code_size; i++)
4564 PVR_PDS_PRINT_INST(buffer[i]);
4565 #endif
4566 }
4567
4568 program->program = *psc_program;
4569 } else {
4570 switch ((program->arg_buffer >> 2) % 4) {
4571 case 0:
4572 if (program->support_base_instance) {
4573 if (program->increment_draw_id) {
4574 pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4575 buffer,
4576 program->arg_buffer & ~0xfull,
4577 dev_info);
4578 pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4579 buffer,
4580 program->index_list_addr_buffer + 4);
4581 pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4582 buffer,
4583 program->index_list_addr_buffer);
4584 pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4585 buffer,
4586 program->num_views);
4587 pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4588 buffer);
4589 } else {
4590 pvr_write_draw_indirect_arrays_base_instance0_di_data(
4591 buffer,
4592 program->arg_buffer & ~0xfull,
4593 dev_info);
4594 pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4595 buffer,
4596 program->index_list_addr_buffer + 4);
4597 pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4598 buffer,
4599 program->index_list_addr_buffer);
4600 pvr_write_draw_indirect_arrays_base_instance0_num_views(
4601 buffer,
4602 program->num_views);
4603 pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4604 }
4605 } else {
4606 pvr_write_draw_indirect_arrays0_di_data(buffer,
4607 program->arg_buffer &
4608 ~0xfull,
4609 dev_info);
4610 pvr_write_draw_indirect_arrays0_write_vdm(
4611 buffer,
4612 program->index_list_addr_buffer + 4);
4613 pvr_write_draw_indirect_arrays0_flush_vdm(
4614 buffer,
4615 program->index_list_addr_buffer);
4616 pvr_write_draw_indirect_arrays0_num_views(buffer,
4617 program->num_views);
4618 pvr_write_draw_indirect_arrays0_immediates(buffer);
4619 }
4620 break;
4621 case 1:
4622 if (program->support_base_instance) {
4623 if (program->increment_draw_id) {
4624 pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4625 buffer,
4626 program->arg_buffer & ~0xfull,
4627 dev_info);
4628 pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4629 buffer,
4630 program->index_list_addr_buffer + 4);
4631 pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4632 buffer,
4633 program->index_list_addr_buffer);
4634 pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4635 buffer,
4636 program->num_views);
4637 pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4638 buffer);
4639 } else {
4640 pvr_write_draw_indirect_arrays_base_instance1_di_data(
4641 buffer,
4642 program->arg_buffer & ~0xfull,
4643 dev_info);
4644 pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4645 buffer,
4646 program->index_list_addr_buffer + 4);
4647 pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4648 buffer,
4649 program->index_list_addr_buffer);
4650 pvr_write_draw_indirect_arrays_base_instance1_num_views(
4651 buffer,
4652 program->num_views);
4653 pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4654 }
4655 } else {
4656 pvr_write_draw_indirect_arrays1_di_data(buffer,
4657 program->arg_buffer &
4658 ~0xfull,
4659 dev_info);
4660 pvr_write_draw_indirect_arrays1_write_vdm(
4661 buffer,
4662 program->index_list_addr_buffer + 4);
4663 pvr_write_draw_indirect_arrays1_flush_vdm(
4664 buffer,
4665 program->index_list_addr_buffer);
4666 pvr_write_draw_indirect_arrays1_num_views(buffer,
4667 program->num_views);
4668 pvr_write_draw_indirect_arrays1_immediates(buffer);
4669 }
4670 break;
4671 case 2:
4672 if (program->support_base_instance) {
4673 if (program->increment_draw_id) {
4674 pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4675 buffer,
4676 program->arg_buffer & ~0xfull,
4677 dev_info);
4678 pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4679 buffer,
4680 program->index_list_addr_buffer + 4);
4681 pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4682 buffer,
4683 program->index_list_addr_buffer);
4684 pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4685 buffer,
4686 program->num_views);
4687 pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4688 buffer);
4689 } else {
4690 pvr_write_draw_indirect_arrays_base_instance2_di_data(
4691 buffer,
4692 program->arg_buffer & ~0xfull,
4693 dev_info);
4694 pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4695 buffer,
4696 program->index_list_addr_buffer + 4);
4697 pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4698 buffer,
4699 program->index_list_addr_buffer);
4700 pvr_write_draw_indirect_arrays_base_instance2_num_views(
4701 buffer,
4702 program->num_views);
4703 pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4704 }
4705 } else {
4706 pvr_write_draw_indirect_arrays2_di_data(buffer,
4707 program->arg_buffer &
4708 ~0xfull,
4709 dev_info);
4710 pvr_write_draw_indirect_arrays2_write_vdm(
4711 buffer,
4712 program->index_list_addr_buffer + 4);
4713 pvr_write_draw_indirect_arrays2_flush_vdm(
4714 buffer,
4715 program->index_list_addr_buffer);
4716 pvr_write_draw_indirect_arrays2_num_views(buffer,
4717 program->num_views);
4718 pvr_write_draw_indirect_arrays2_immediates(buffer);
4719 }
4720 break;
4721 case 3:
4722 if (program->support_base_instance) {
4723 if (program->increment_draw_id) {
4724 pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4725 buffer,
4726 program->arg_buffer & ~0xfull,
4727 dev_info);
4728 pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4729 buffer,
4730 program->index_list_addr_buffer + 4);
4731 pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4732 buffer,
4733 program->index_list_addr_buffer);
4734 pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4735 buffer,
4736 program->num_views);
4737 pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4738 buffer);
4739 } else {
4740 pvr_write_draw_indirect_arrays_base_instance3_di_data(
4741 buffer,
4742 program->arg_buffer & ~0xfull,
4743 dev_info);
4744 pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4745 buffer,
4746 program->index_list_addr_buffer + 4);
4747 pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4748 buffer,
4749 program->index_list_addr_buffer);
4750 pvr_write_draw_indirect_arrays_base_instance3_num_views(
4751 buffer,
4752 program->num_views);
4753 pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4754 }
4755 } else {
4756 pvr_write_draw_indirect_arrays3_di_data(buffer,
4757 program->arg_buffer &
4758 ~0xfull,
4759 dev_info);
4760 pvr_write_draw_indirect_arrays3_write_vdm(
4761 buffer,
4762 program->index_list_addr_buffer + 4);
4763 pvr_write_draw_indirect_arrays3_flush_vdm(
4764 buffer,
4765 program->index_list_addr_buffer);
4766 pvr_write_draw_indirect_arrays3_num_views(buffer,
4767 program->num_views);
4768 pvr_write_draw_indirect_arrays3_immediates(buffer);
4769 }
4770 break;
4771 }
4772 }
4773 }
4774
4775 #include "pvr_draw_indirect_elements0.h"
4776 #include "pvr_draw_indirect_elements1.h"
4777 #include "pvr_draw_indirect_elements2.h"
4778 #include "pvr_draw_indirect_elements3.h"
4779 #include "pvr_draw_indirect_elements_base_instance0.h"
4780 #include "pvr_draw_indirect_elements_base_instance1.h"
4781 #include "pvr_draw_indirect_elements_base_instance2.h"
4782 #include "pvr_draw_indirect_elements_base_instance3.h"
4783 #include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4784 #include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4785 #include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4786 #include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4787
pvr_pds_generate_draw_elements_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4788 void pvr_pds_generate_draw_elements_indirect(
4789 struct pvr_pds_drawindirect_program *restrict program,
4790 uint32_t *restrict buffer,
4791 enum pvr_pds_generate_mode gen_mode,
4792 const struct pvr_device_info *dev_info)
4793 {
4794 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4795 (gen_mode == PDS_GENERATE_SIZES)) {
4796 const struct pvr_psc_program_output *psc_program = NULL;
4797 switch ((program->arg_buffer >> 2) % 4) {
4798 case 0:
4799 if (program->support_base_instance) {
4800 if (program->increment_draw_id) {
4801 psc_program =
4802 &pvr_draw_indirect_elements_base_instance_drawid0_program;
4803 } else {
4804 psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4805 }
4806 } else {
4807 psc_program = &pvr_draw_indirect_elements0_program;
4808 }
4809 break;
4810 case 1:
4811 if (program->support_base_instance) {
4812 if (program->increment_draw_id) {
4813 psc_program =
4814 &pvr_draw_indirect_elements_base_instance_drawid1_program;
4815 } else {
4816 psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4817 }
4818 } else {
4819 psc_program = &pvr_draw_indirect_elements1_program;
4820 }
4821 break;
4822 case 2:
4823 if (program->support_base_instance) {
4824 if (program->increment_draw_id) {
4825 psc_program =
4826 &pvr_draw_indirect_elements_base_instance_drawid2_program;
4827 } else {
4828 psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4829 }
4830 } else {
4831 psc_program = &pvr_draw_indirect_elements2_program;
4832 }
4833 break;
4834 case 3:
4835 if (program->support_base_instance) {
4836 if (program->increment_draw_id) {
4837 psc_program =
4838 &pvr_draw_indirect_elements_base_instance_drawid3_program;
4839 } else {
4840 psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4841 }
4842 } else {
4843 psc_program = &pvr_draw_indirect_elements3_program;
4844 }
4845 break;
4846 }
4847
4848 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4849 memcpy(buffer,
4850 psc_program->code,
4851 psc_program->code_size * sizeof(uint32_t));
4852
4853 #if defined(DUMP_PDS)
4854 for (uint32_t i = 0; i < psc_program->code_size; i++)
4855 PVR_PDS_PRINT_INST(buffer[i]);
4856 #endif
4857 }
4858
4859 program->program = *psc_program;
4860 } else {
4861 switch ((program->arg_buffer >> 2) % 4) {
4862 case 0:
4863 if (program->support_base_instance) {
4864 if (program->increment_draw_id) {
4865 pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4866 buffer,
4867 program->arg_buffer & ~0xfull,
4868 dev_info);
4869 pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4870 buffer,
4871 program->index_list_addr_buffer);
4872 pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4873 buffer,
4874 program->index_list_addr_buffer);
4875 pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4876 buffer,
4877 program->num_views);
4878 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4879 buffer,
4880 program->index_stride);
4881 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4882 buffer,
4883 program->index_buffer);
4884 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4885 buffer,
4886 program->index_block_header);
4887 pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4888 buffer);
4889 } else {
4890 pvr_write_draw_indirect_elements_base_instance0_di_data(
4891 buffer,
4892 program->arg_buffer & ~0xfull,
4893 dev_info);
4894 pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4895 buffer,
4896 program->index_list_addr_buffer);
4897 pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4898 buffer,
4899 program->index_list_addr_buffer);
4900 pvr_write_draw_indirect_elements_base_instance0_num_views(
4901 buffer,
4902 program->num_views);
4903 pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4904 buffer,
4905 program->index_stride);
4906 pvr_write_draw_indirect_elements_base_instance0_idx_base(
4907 buffer,
4908 program->index_buffer);
4909 pvr_write_draw_indirect_elements_base_instance0_idx_header(
4910 buffer,
4911 program->index_block_header);
4912 pvr_write_draw_indirect_elements_base_instance0_immediates(
4913 buffer);
4914 }
4915 } else {
4916 pvr_write_draw_indirect_elements0_di_data(buffer,
4917 program->arg_buffer &
4918 ~0xfull,
4919 dev_info);
4920 pvr_write_draw_indirect_elements0_write_vdm(
4921 buffer,
4922 program->index_list_addr_buffer);
4923 pvr_write_draw_indirect_elements0_flush_vdm(
4924 buffer,
4925 program->index_list_addr_buffer);
4926 pvr_write_draw_indirect_elements0_num_views(buffer,
4927 program->num_views);
4928 pvr_write_draw_indirect_elements0_idx_stride(buffer,
4929 program->index_stride);
4930 pvr_write_draw_indirect_elements0_idx_base(buffer,
4931 program->index_buffer);
4932 pvr_write_draw_indirect_elements0_idx_header(
4933 buffer,
4934 program->index_block_header);
4935 pvr_write_draw_indirect_elements0_immediates(buffer);
4936 }
4937 break;
4938 case 1:
4939 if (program->support_base_instance) {
4940 if (program->increment_draw_id) {
4941 pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4942 buffer,
4943 program->arg_buffer & ~0xfull,
4944 dev_info);
4945 pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4946 buffer,
4947 program->index_list_addr_buffer);
4948 pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4949 buffer,
4950 program->index_list_addr_buffer);
4951 pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4952 buffer,
4953 program->num_views);
4954 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4955 buffer,
4956 program->index_stride);
4957 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4958 buffer,
4959 program->index_buffer);
4960 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4961 buffer,
4962 program->index_block_header);
4963 pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4964 buffer);
4965 } else {
4966 pvr_write_draw_indirect_elements_base_instance1_di_data(
4967 buffer,
4968 program->arg_buffer & ~0xfull,
4969 dev_info);
4970 pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4971 buffer,
4972 program->index_list_addr_buffer);
4973 pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4974 buffer,
4975 program->index_list_addr_buffer);
4976 pvr_write_draw_indirect_elements_base_instance1_num_views(
4977 buffer,
4978 program->num_views);
4979 pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4980 buffer,
4981 program->index_stride);
4982 pvr_write_draw_indirect_elements_base_instance1_idx_base(
4983 buffer,
4984 program->index_buffer);
4985 pvr_write_draw_indirect_elements_base_instance1_idx_header(
4986 buffer,
4987 program->index_block_header);
4988 pvr_write_draw_indirect_elements_base_instance1_immediates(
4989 buffer);
4990 }
4991 } else {
4992 pvr_write_draw_indirect_elements1_di_data(buffer,
4993 program->arg_buffer &
4994 ~0xfull,
4995 dev_info);
4996 pvr_write_draw_indirect_elements1_write_vdm(
4997 buffer,
4998 program->index_list_addr_buffer);
4999 pvr_write_draw_indirect_elements1_flush_vdm(
5000 buffer,
5001 program->index_list_addr_buffer);
5002 pvr_write_draw_indirect_elements1_num_views(buffer,
5003 program->num_views);
5004 pvr_write_draw_indirect_elements1_idx_stride(buffer,
5005 program->index_stride);
5006 pvr_write_draw_indirect_elements1_idx_base(buffer,
5007 program->index_buffer);
5008 pvr_write_draw_indirect_elements1_idx_header(
5009 buffer,
5010 program->index_block_header);
5011 pvr_write_draw_indirect_elements1_immediates(buffer);
5012 }
5013 break;
5014 case 2:
5015 if (program->support_base_instance) {
5016 if (program->increment_draw_id) {
5017 pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
5018 buffer,
5019 program->arg_buffer & ~0xfull,
5020 dev_info);
5021 pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
5022 buffer,
5023 program->index_list_addr_buffer);
5024 pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5025 buffer,
5026 program->index_list_addr_buffer);
5027 pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5028 buffer,
5029 program->num_views);
5030 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5031 buffer,
5032 program->index_stride);
5033 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5034 buffer,
5035 program->index_buffer);
5036 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5037 buffer,
5038 program->index_block_header);
5039 pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5040 buffer);
5041 } else {
5042 pvr_write_draw_indirect_elements_base_instance2_di_data(
5043 buffer,
5044 program->arg_buffer & ~0xfull,
5045 dev_info);
5046 pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5047 buffer,
5048 program->index_list_addr_buffer);
5049 pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5050 buffer,
5051 program->index_list_addr_buffer);
5052 pvr_write_draw_indirect_elements_base_instance2_num_views(
5053 buffer,
5054 program->num_views);
5055 pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5056 buffer,
5057 program->index_stride);
5058 pvr_write_draw_indirect_elements_base_instance2_idx_base(
5059 buffer,
5060 program->index_buffer);
5061 pvr_write_draw_indirect_elements_base_instance2_idx_header(
5062 buffer,
5063 program->index_block_header);
5064 pvr_write_draw_indirect_elements_base_instance2_immediates(
5065 buffer);
5066 }
5067 } else {
5068 pvr_write_draw_indirect_elements2_di_data(buffer,
5069 program->arg_buffer &
5070 ~0xfull,
5071 dev_info);
5072 pvr_write_draw_indirect_elements2_write_vdm(
5073 buffer,
5074 program->index_list_addr_buffer);
5075 pvr_write_draw_indirect_elements2_flush_vdm(
5076 buffer,
5077 program->index_list_addr_buffer);
5078 pvr_write_draw_indirect_elements2_num_views(buffer,
5079 program->num_views);
5080 pvr_write_draw_indirect_elements2_idx_stride(buffer,
5081 program->index_stride);
5082 pvr_write_draw_indirect_elements2_idx_base(buffer,
5083 program->index_buffer);
5084 pvr_write_draw_indirect_elements2_idx_header(
5085 buffer,
5086 program->index_block_header);
5087 pvr_write_draw_indirect_elements2_immediates(buffer);
5088 }
5089 break;
5090 case 3:
5091 if (program->support_base_instance) {
5092 if (program->increment_draw_id) {
5093 pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5094 buffer,
5095 program->arg_buffer & ~0xfull,
5096 dev_info);
5097 pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5098 buffer,
5099 program->index_list_addr_buffer);
5100 pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5101 buffer,
5102 program->index_list_addr_buffer);
5103 pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5104 buffer,
5105 program->num_views);
5106 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5107 buffer,
5108 program->index_stride);
5109 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5110 buffer,
5111 program->index_buffer);
5112 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5113 buffer,
5114 program->index_block_header);
5115 pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5116 buffer);
5117 } else {
5118 pvr_write_draw_indirect_elements_base_instance3_di_data(
5119 buffer,
5120 program->arg_buffer & ~0xfull,
5121 dev_info);
5122 pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5123 buffer,
5124 program->index_list_addr_buffer);
5125 pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5126 buffer,
5127 program->index_list_addr_buffer);
5128 pvr_write_draw_indirect_elements_base_instance3_num_views(
5129 buffer,
5130 program->num_views);
5131 pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5132 buffer,
5133 program->index_stride);
5134 pvr_write_draw_indirect_elements_base_instance3_idx_base(
5135 buffer,
5136 program->index_buffer);
5137 pvr_write_draw_indirect_elements_base_instance3_idx_header(
5138 buffer,
5139 program->index_block_header);
5140 pvr_write_draw_indirect_elements_base_instance3_immediates(
5141 buffer);
5142 }
5143 } else {
5144 pvr_write_draw_indirect_elements3_di_data(buffer,
5145 program->arg_buffer &
5146 ~0xfull,
5147 dev_info);
5148 pvr_write_draw_indirect_elements3_write_vdm(
5149 buffer,
5150 program->index_list_addr_buffer);
5151 pvr_write_draw_indirect_elements3_flush_vdm(
5152 buffer,
5153 program->index_list_addr_buffer);
5154 pvr_write_draw_indirect_elements3_num_views(buffer,
5155 program->num_views);
5156 pvr_write_draw_indirect_elements3_idx_stride(buffer,
5157 program->index_stride);
5158 pvr_write_draw_indirect_elements3_idx_base(buffer,
5159 program->index_buffer);
5160 pvr_write_draw_indirect_elements3_idx_header(
5161 buffer,
5162 program->index_block_header);
5163 pvr_write_draw_indirect_elements3_immediates(buffer);
5164 }
5165 break;
5166 }
5167 }
5168 }
5169