1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * Author: Tom Stellard <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_compiler.h"
8 #include "radeon_compiler_util.h"
9 #include "radeon_dataflow.h"
10 #include "radeon_program.h"
11 #include "radeon_program_constants.h"
12
13 struct vert_fc_state {
14 struct radeon_compiler *C;
15 unsigned BranchDepth;
16 unsigned LoopDepth;
17 unsigned LoopsReserved;
18 int PredStack[R500_PVS_MAX_LOOP_DEPTH];
19 int PredicateReg;
20 };
21
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)22 static void build_pred_src(
23 struct rc_src_register * src,
24 struct vert_fc_state * fc_state)
25 {
26 src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
27 RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
28 src->File = RC_FILE_TEMPORARY;
29 src->Index = fc_state->PredicateReg;
30 }
31
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)32 static void build_pred_dst(
33 struct rc_dst_register * dst,
34 struct vert_fc_state * fc_state)
35 {
36 dst->WriteMask = RC_MASK_W;
37 dst->File = RC_FILE_TEMPORARY;
38 dst->Index = fc_state->PredicateReg;
39 }
40
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)41 static void mark_write(void * userdata, struct rc_instruction * inst,
42 rc_register_file file, unsigned int index, unsigned int mask)
43 {
44 unsigned int * writemasks = userdata;
45
46 if (file != RC_FILE_TEMPORARY)
47 return;
48
49 if (index >= R300_VS_MAX_TEMPS)
50 return;
51
52 writemasks[index] |= mask;
53 }
54
reserve_predicate_reg(struct vert_fc_state * fc_state)55 static int reserve_predicate_reg(struct vert_fc_state * fc_state)
56 {
57 int i;
58 unsigned int writemasks[RC_REGISTER_MAX_INDEX];
59 struct rc_instruction * inst;
60 memset(writemasks, 0, sizeof(writemasks));
61 for(inst = fc_state->C->Program.Instructions.Next;
62 inst != &fc_state->C->Program.Instructions;
63 inst = inst->Next) {
64 rc_for_all_writes_mask(inst, mark_write, writemasks);
65 }
66
67 for(i = 0; i < fc_state->C->max_temp_regs; i++) {
68 /* Most of the control flow instructions only write the
69 * W component of the Predicate Register, but
70 * the docs say that ME_PRED_SET_CLR and
71 * ME_PRED_SET_RESTORE write all components of the
72 * register, so we must reserve a register that has
73 * all its components free. */
74 if (!writemasks[i]) {
75 fc_state->PredicateReg = i;
76 break;
77 }
78 }
79 if (i == fc_state->C->max_temp_regs) {
80 rc_error(fc_state->C, "No free temporary to use for"
81 " predicate stack counter.\n");
82 return -1;
83 }
84 return 1;
85 }
86
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)87 static void lower_bgnloop(
88 struct rc_instruction * inst,
89 struct vert_fc_state * fc_state)
90 {
91 struct rc_instruction * new_inst =
92 rc_insert_new_instruction(fc_state->C, inst->Prev);
93
94 if ((!fc_state->C->is_r500
95 && fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
96 || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
97 rc_error(fc_state->C, "Loops are nested too deep.");
98 return;
99 }
100
101 if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
102 if (fc_state->PredicateReg == -1) {
103 if (reserve_predicate_reg(fc_state) == -1) {
104 return;
105 }
106 }
107
108 /* Initialize the predicate bit to true. */
109 new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
110 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
111 new_inst->U.I.SrcReg[0].Index = 0;
112 new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
113 new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
114 } else {
115 fc_state->PredStack[fc_state->LoopDepth] =
116 fc_state->PredicateReg;
117 /* Copy the current predicate value to this loop's
118 * predicate register */
119
120 /* Use the old predicate value for src0 */
121 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
122
123 /* Reserve this loop's predicate register */
124 if (reserve_predicate_reg(fc_state) == -1) {
125 return;
126 }
127
128 /* Copy the old predicate value to the new register */
129 new_inst->U.I.Opcode = RC_OPCODE_ADD;
130 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
131 new_inst->U.I.SrcReg[1].Index = 0;
132 new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
133 new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
134 }
135
136 }
137
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)138 static void lower_brk(
139 struct rc_instruction * inst,
140 struct vert_fc_state * fc_state)
141 {
142 if (fc_state->LoopDepth == 1) {
143 inst->U.I.Opcode = RC_OPCODE_RCP;
144 inst->U.I.DstReg.Pred = RC_PRED_SET;
145 inst->U.I.SrcReg[0].Index = 0;
146 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
147 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
148 } else {
149 inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
150 inst->U.I.DstReg.Pred = RC_PRED_SET;
151 }
152
153 build_pred_dst(&inst->U.I.DstReg, fc_state);
154 }
155
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)156 static void lower_endloop(
157 struct rc_instruction * inst,
158 struct vert_fc_state * fc_state)
159 {
160 struct rc_instruction * new_inst =
161 rc_insert_new_instruction(fc_state->C, inst);
162
163 new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
164 build_pred_dst(&new_inst->U.I.DstReg, fc_state);
165 /* Restore the previous predicate register. */
166 fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
167 build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
168 }
169
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)170 static void lower_if(
171 struct rc_instruction * inst,
172 struct vert_fc_state * fc_state)
173 {
174 /* Reserve a temporary to use as our predicate stack counter, if we
175 * don't already have one. */
176 if (fc_state->PredicateReg == -1) {
177 /* If we are inside a loop, the Predicate Register should
178 * have already been defined. */
179 assert(fc_state->LoopDepth == 0);
180
181 if (reserve_predicate_reg(fc_state) == -1) {
182 return;
183 }
184 }
185
186 if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
187 inst->U.I.Opcode = RC_ME_PRED_SNEQ;
188 } else {
189 unsigned swz;
190 inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
191 memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
192 sizeof(inst->U.I.SrcReg[1]));
193 swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
194 /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
195 * w component */
196 inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
197 RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
198 build_pred_src(&inst->U.I.SrcReg[0], fc_state);
199 }
200 build_pred_dst(&inst->U.I.DstReg, fc_state);
201 }
202
rc_vert_fc(struct radeon_compiler * c,void * user)203 void rc_vert_fc(struct radeon_compiler *c, void *user)
204 {
205 struct rc_instruction * inst;
206 struct vert_fc_state fc_state;
207
208 memset(&fc_state, 0, sizeof(fc_state));
209 fc_state.PredicateReg = -1;
210 fc_state.C = c;
211
212 for(inst = c->Program.Instructions.Next;
213 inst != &c->Program.Instructions;
214 inst = inst->Next) {
215
216 switch (inst->U.I.Opcode) {
217
218 case RC_OPCODE_BGNLOOP:
219 lower_bgnloop(inst, &fc_state);
220 fc_state.LoopDepth++;
221 break;
222
223 case RC_OPCODE_BRK:
224 lower_brk(inst, &fc_state);
225 break;
226
227 case RC_OPCODE_ENDLOOP:
228 if (fc_state.BranchDepth != 0
229 || fc_state.LoopDepth != 1) {
230 lower_endloop(inst, &fc_state);
231 /* Skip the new PRED_RESTORE */
232 inst = inst->Next;
233 }
234 fc_state.LoopDepth--;
235 break;
236 case RC_OPCODE_IF:
237 lower_if(inst, &fc_state);
238 fc_state.BranchDepth++;
239 break;
240
241 case RC_OPCODE_ELSE:
242 inst->U.I.Opcode = RC_ME_PRED_SET_INV;
243 build_pred_dst(&inst->U.I.DstReg, &fc_state);
244 build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
245 break;
246
247 case RC_OPCODE_ENDIF:
248 /* TODO: If LoopDepth == 1 and there is only a single break
249 * we can optimize out the endif just after the break. However
250 * previous attempts were buggy, so keep it simple for now.
251 */
252 inst->U.I.Opcode = RC_ME_PRED_SET_POP;
253 build_pred_dst(&inst->U.I.DstReg, &fc_state);
254 build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
255 fc_state.BranchDepth--;
256 break;
257
258 default:
259 if (fc_state.BranchDepth || fc_state.LoopDepth) {
260 inst->U.I.DstReg.Pred = RC_PRED_SET;
261 }
262 break;
263 }
264
265 if (c->Error) {
266 return;
267 }
268 }
269 }
270